Loading

ADDI Alzheimers Detection Challenge

ADDI Alzhemiers columns Reference

Feature Explorations

siddharth

Feature Explorations

ADDI Alzhemiers columns Reference

In [7]:
from IPython.display import HTML

HTML(''' <script>
code_show=true;
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }

 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
Code hidden , to toggle: <a href="javascript:code_toggle()">here</a>.''')
Out[7]:
Code hidden , to toggle: here.
In [1]:
import pandas as pd
In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import HTML
import warnings
warnings.filterwarnings("ignore")

INPUT_DIR = '../data/'
COLS_TO_SHOW = 130


def h(content):
    display(HTML(content))
    
def target_hist(col):
    tr[tr['diagnosis'] == 'normal'][col].plot(style='.', title='Hist ' + col, figsize=(15, 3))
    tr[tr['diagnosis'] == 'post_alzheimer'][col].plot(style='.', title='Hist ' + col, figsize=(15, 3))
    tr[tr['diagnosis'] == 'pre_alzheimer'][col].plot(style='.',figsize=(15, 3),
                                                     title=col + ' (blue=normal, orange=post_alzheimer, green=pre_alzheimer)')
    plt.show()
    
    
def _desc(data, col, label):
    d0 = data.describe().reset_index()
    d0.columns = [col, label]
    return d0.append({col:'unique values', label:data.unique().shape[0]}, ignore_index=True) \
             .append({col:'unique values / count', label:np.round(data.unique().shape[0] / data.shape[0], 4)}, ignore_index=True) \
             .append({col:'NaNs', label:data.isnull().sum()}, ignore_index=True) \
    
def desc1(col):
    d0 = _desc(tr[col], col, 'Train')
    d3 = _desc(te[col], col, 'Validation')
    dd = d0.merge(d3)
    display(dd)
    
    if col not in ['row_id']:
        h('<b>Most popular values (NaN = -999):</b>')
        N = 10
        d0 = tr[['row_id',col]].fillna(-999).groupby(col)['row_id'].count().reset_index()
        d1 = te[['row_id',col]].fillna(-999).groupby(col)['row_id'].count().reset_index()
        dd = d0.merge(d1, how='left', on=col)
        dd['Share in train'] = np.round(dd['row_id_x'] / dd['row_id_x'].sum(), 5)
        dd['Share in validation'] = np.round(dd['row_id_y'] / dd['row_id_y'].sum(), 5)
        dd = dd.sort_values('row_id_x', ascending=False).head(N).fillna(0).reset_index(drop=True)
        dd = dd.rename({'row_id_x':'Count in train (desc)','row_id_y':'Count in validation'}, axis=1)
        display(dd)

def hist1(col):
    plt.figure(figsize=(15, 3))
    plt.subplot(121)
    plt.hist(tr[col], bins=70);
    plt.title('Train histogram: ' + col);
    plt.subplot(122)
    plt.hist(te[col], bins=70);
    plt.title('Validation histogram: ' + col);
    plt.show()

def barh1(col):
    if col not in ['row_id']:
        plt.figure(figsize=(15, 3))
        plt.subplot(121)
        tr[col].value_counts().sort_values().plot(kind = 'barh')
        plt.title('Train value counts: ' + col);
        plt.subplot(122)
        te[col].value_counts().sort_values().plot(kind = 'barh')
        plt.title('Validation value counts: ' + col);
        plt.show()
        
def corr1(col):
    N = None #10000
    num_vars = [f for f in tr.columns if tr[f].dtype != 'object']
    trx = tr.head(N) if N is not None else tr.copy()
    corrs = trx[num_vars].corrwith(trx[col]).reset_index().sort_values(0, ascending=False).reset_index(drop=True).rename({'index':'Column',0:'Correlation with ' + col}, axis=1)
    h('<b>Most correlated values with ' + col + ':</b>')
    trx = pd.concat([corrs.head(6), corrs.dropna().tail(5)])
    def linkx(val):
        return '<a href="#c_{}">{}</a>'.format(val, val) if val in included_cols else val
    trx['Column'] = trx['Column'].apply(linkx)
    h(trx.to_html(escape=False))
    
def numeric(col):
    target_hist(col)
    hist1(col)
    desc1(col)
    corr1(col) 
    
def categorical(col):
    barh1(col)
    desc1(col)

def proc(col):
    h('<h3 id="c_' + col + '">' + col + '</h3>' + '<a style="font-size:11px" href="#home">(Jump to top)</a>')
    categorical(col) if tr[col].dtype == 'object' or tr[col].nunique()<=10 else numeric(col)
        
tr = pd.read_csv(INPUT_DIR + 'train.csv')
te = pd.read_csv(INPUT_DIR + 'validation.csv')

included_cols = list(tr.columns.values[:COLS_TO_SHOW])
split_on = ['row_id','missing_digit_1','1 dist from cen','diagnosis','euc_dist_digit_1','area_digit_1','height_digit_1','width_digit_1','variance_width']
h('<b>Links to column info:</b> ' + ', '.join([('<li>' if col in split_on else '') + '<a href="#c_' + col + '">' + col + '</a>' for col in included_cols]))

h('Train features shape: <b>' + str(tr.shape) + '</b>' + 
  '<br>Validation features shape: <b>' + str(te.shape) + '</b>')
h('Train features preview:')
display(tr.head(10))

for col in included_cols:
    if "diagnosis" not in col:
        proc(col)
Links to column info:
  • row_id, number_of_digits,
  • missing_digit_1, missing_digit_2, missing_digit_3, missing_digit_4, missing_digit_5, missing_digit_6, missing_digit_7, missing_digit_8, missing_digit_9, missing_digit_10, missing_digit_11, missing_digit_12,
  • 1 dist from cen, 10 dist from cen, 11 dist from cen, 12 dist from cen, 2 dist from cen, 3 dist from cen, 4 dist from cen, 5 dist from cen, 6 dist from cen, 7 dist from cen, 8 dist from cen, 9 dist from cen,
  • euc_dist_digit_1, euc_dist_digit_2, euc_dist_digit_3, euc_dist_digit_4, euc_dist_digit_5, euc_dist_digit_6, euc_dist_digit_7, euc_dist_digit_8, euc_dist_digit_9, euc_dist_digit_10, euc_dist_digit_11, euc_dist_digit_12,
  • area_digit_1, area_digit_2, area_digit_3, area_digit_4, area_digit_5, area_digit_6, area_digit_7, area_digit_8, area_digit_9, area_digit_10, area_digit_11, area_digit_12,
  • height_digit_1, height_digit_2, height_digit_3, height_digit_4, height_digit_5, height_digit_6, height_digit_7, height_digit_8, height_digit_9, height_digit_10, height_digit_11, height_digit_12,
  • width_digit_1, width_digit_2, width_digit_3, width_digit_4, width_digit_5, width_digit_6, width_digit_7, width_digit_8, width_digit_9, width_digit_10, width_digit_11, width_digit_12,
  • variance_width, variance_height, variance_area, deviation_dist_from_mid_axis, between_axis_digits_angle_sum, between_axis_digits_angle_var, between_digits_angle_cw_sum, between_digits_angle_cw_var, between_digits_angle_ccw_sum, between_digits_angle_ccw_var, sequence_flag_cw, sequence_flag_ccw, number_of_hands, hand_count_dummy, hour_hand_length, minute_hand_length, single_hand_length, clockhand_ratio, clockhand_diff, angle_between_hands, deviation_from_centre, intersection_pos_rel_centre, hour_proximity_from_11, minute_proximity_from_2, hour_pointing_digit, actual_hour_digit, minute_pointing_digit, actual_minute_digit, final_rotation_angle, ellipse_circle_ratio, count_defects, percentage_inside_ellipse, pred_tremor, double_major, double_minor, vertical_dist, horizontal_dist, top_area_perc, bottom_area_perc, left_area_perc, right_area_perc, hor_count, vert_count, eleven_ten_error, other_error, time_diff, centre_dot_detect,
  • diagnosis
  • Train features shape: (32777, 122)
    Validation features shape: (362, 121)
    Train features preview:
    row_id number_of_digits missing_digit_1 missing_digit_2 missing_digit_3 missing_digit_4 missing_digit_5 missing_digit_6 missing_digit_7 missing_digit_8 ... bottom_area_perc left_area_perc right_area_perc hor_count vert_count eleven_ten_error other_error time_diff centre_dot_detect diagnosis
    0 S0CIXBKIUEOUBNURP 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.526170 0.524975 0.474667 0 0 0 1 -105.0 0.0 normal
    1 IW1Z4Z3H720OPW8LL 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000810 0.516212 0.483330 0 1 0 1 NaN NaN normal
    2 PVUGU14JRSU44ZADT 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.488109 0.550606 0.449042 0 0 0 0 0.0 0.0 normal
    3 RW5UTGMB9H67LWJHX 7.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 ... NaN NaN NaN 1 0 0 1 NaN NaN normal
    4 W0IM2V6F6UP5LYS3E 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.512818 0.511865 0.487791 0 1 0 0 0.0 1.0 normal
    5 IR9A4R5TTZJR78ZC8 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.513425 0.482235 0.517410 0 0 0 1 495.0 0.0 normal
    6 LS1R4PFJUOVEU0K0E 2.0 1.0 0.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 0.510611 0.527788 0.471864 1 0 0 1 NaN NaN post_alzheimer
    7 OQLC2VXVZUNWI31P9 11.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 ... 0.501412 0.532871 0.466693 1 0 0 1 NaN NaN normal
    8 N0KKCFX9FJG0NSQ1E 4.0 1.0 0.0 0.0 1.0 1.0 0.0 1.0 1.0 ... NaN NaN NaN 1 0 0 1 540.0 0.0 normal
    9 3LDA1Z7RH2HXAKRR1 10.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.491031 0.530640 0.468971 1 1 0 1 NaN NaN normal

    10 rows × 122 columns

    row_id Train Validation
    0 count 32777 362
    1 unique 32777 362
    2 top Q97OXAMNLSONJNDM2 NTZ3OMMX8MDOJZXWS
    3 freq 1 1
    4 unique values 32777 362
    5 unique values / count 1.0 1.0
    6 NaNs 0 0

    number_of_digits

    (Jump to top)
    number_of_digits Train Validation
    0 count 32703.000000 359.000000
    1 mean 10.299422 9.520891
    2 std 2.345710 3.132811
    3 min 1.000000 1.000000
    4 25% 10.000000 8.500000
    5 50% 11.000000 11.000000
    6 75% 12.000000 12.000000
    7 max 17.000000 13.000000
    8 unique values 18.000000 14.000000
    9 unique values / count 0.000500 0.038700
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    number_of_digits Count in train (desc) Count in validation Share in train Share in validation
    0 12.0 12818 122.0 0.39107 0.33702
    1 11.0 7535 74.0 0.22989 0.20442
    2 10.0 4416 42.0 0.13473 0.11602
    3 9.0 2541 27.0 0.07752 0.07459
    4 8.0 1564 12.0 0.04772 0.03315
    5 7.0 932 12.0 0.02843 0.03315
    6 6.0 678 14.0 0.02069 0.03867
    7 5.0 507 11.0 0.01547 0.03039
    8 4.0 460 13.0 0.01403 0.03591
    9 1.0 390 13.0 0.01190 0.03591
    Most correlated values with number_of_digits:
    Column Correlation with number_of_digits
    0 number_of_digits 1.000000
    1 between_digits_angle_cw_sum 0.450802
    2 between_axis_digits_angle_sum 0.398671
    3 between_digits_angle_ccw_sum 0.384744
    4 count_defects 0.337459
    5 sequence_flag_cw 0.304220
    112 missing_digit_3 -0.564346
    113 missing_digit_6 -0.579419
    114 missing_digit_5 -0.597470
    115 missing_digit_4 -0.601174
    116 missing_digit_7 -0.617619

    missing_digit_1

    (Jump to top)
    missing_digit_1 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.221845 0.289694
    2 std 0.415494 0.454253
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 25448 255 0.77640 0.70442
    1 1.0 7255 104 0.22134 0.28729
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_2

    (Jump to top)
    missing_digit_2 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.148243 0.178273
    2 std 0.355346 0.383277
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27855 295 0.84983 0.81492
    1 1.0 4848 64 0.14791 0.17680
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_3

    (Jump to top)
    missing_digit_3 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.125096 0.172702
    2 std 0.330832 0.378517
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28612 297 0.87293 0.82044
    1 1.0 4091 62 0.12481 0.17127
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_4

    (Jump to top)
    missing_digit_4 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.166713 0.250696
    2 std 0.372725 0.434019
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.500000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27251 269 0.83141 0.74309
    1 1.0 5452 90 0.16634 0.24862
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_5

    (Jump to top)
    missing_digit_5 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.202153 0.261838
    2 std 0.401612 0.440249
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 26092 265 0.79605 0.73204
    1 1.0 6611 94 0.20170 0.25967
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_6

    (Jump to top)
    missing_digit_6 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.131364 0.192201
    2 std 0.337803 0.394580
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28407 290 0.86667 0.80110
    1 1.0 4296 69 0.13107 0.19061
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_7

    (Jump to top)
    missing_digit_7 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.126839 0.197772
    2 std 0.332797 0.398875
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28555 288 0.87119 0.79558
    1 1.0 4148 71 0.12655 0.19613
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_8

    (Jump to top)
    missing_digit_8 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.120723 0.186630
    2 std 0.325810 0.390158
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28755 292 0.87729 0.80663
    1 1.0 3948 67 0.12045 0.18508
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_9

    (Jump to top)
    missing_digit_9 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.175183 0.289694
    2 std 0.380129 0.454253
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 26974 255 0.82296 0.70442
    1 1.0 5729 104 0.17479 0.28729
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_10

    (Jump to top)
    missing_digit_10 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.147418 0.236769
    2 std 0.354527 0.425693
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27882 274 0.85066 0.75691
    1 1.0 4821 85 0.14708 0.23481
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_11

    (Jump to top)
    missing_digit_11 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.168241 0.222841
    2 std 0.374086 0.416733
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 27201 279 0.82988 0.77072
    1 1.0 5502 80 0.16786 0.22099
    2 -999.0 74 3 0.00226 0.00829

    missing_digit_12

    (Jump to top)
    missing_digit_12 Train Validation
    0 count 32703.000000 359.000000
    1 mean 0.115158 0.189415
    2 std 0.319217 0.392385
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    missing_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 28937 291 0.88284 0.80387
    1 1.0 3766 68 0.11490 0.18785
    2 -999.0 74 3 0.00226 0.00829

    1 dist from cen

    (Jump to top)
    1 dist from cen Train Validation
    0 count 25448.000000 255.000000
    1 mean 361.869732 354.339930
    2 std 50.310698 57.701010
    3 min 3.354102 51.983170
    4 25% 336.580321 330.244742
    5 50% 367.434688 368.160970
    6 75% 393.898464 388.853105
    7 max 618.025889 492.941426
    8 unique values 21148.000000 255.000000
    9 unique values / count 0.645200 0.704400
    10 NaNs 7329.000000 107.000000
    Most popular values (NaN = -999):
    1 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 7329 107.0 0.22360 0.54872
    1 380.304155 6 0.0 0.00018 0.00000
    2 383.020887 6 0.0 0.00018 0.00000
    3 400.707187 6 0.0 0.00018 0.00000
    4 362.286144 6 0.0 0.00018 0.00000
    5 353.022662 6 0.0 0.00018 0.00000
    6 371.875315 5 0.0 0.00015 0.00000
    7 393.817851 5 0.0 0.00015 0.00000
    8 391.651950 5 0.0 0.00015 0.00000
    9 370.961588 5 0.0 0.00015 0.00000
    Most correlated values with 1 dist from cen:
    Column Correlation with 1 dist from cen
    0 1 dist from cen 1.000000
    1 2 dist from cen 0.709005
    2 12 dist from cen 0.658284
    3 3 dist from cen 0.542965
    4 11 dist from cen 0.435573
    5 4 dist from cen 0.358795
    111 area_digit_2 -0.269999
    112 height_digit_3 -0.275796
    113 width_digit_12 -0.292792
    114 height_digit_12 -0.301515
    115 area_digit_12 -0.346086

    10 dist from cen

    (Jump to top)
    10 dist from cen Train Validation
    0 count 27882.000000 274.000000
    1 mean 367.418424 362.651154
    2 std 48.060878 61.588089
    3 min 5.852350 37.643060
    4 25% 343.945581 338.489183
    5 50% 372.683512 373.309857
    6 75% 397.112940 401.833946
    7 max 628.776988 505.421853
    8 unique values 22765.000000 275.000000
    9 unique values / count 0.694500 0.759700
    10 NaNs 4895.000000 88.000000
    Most popular values (NaN = -999):
    10 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4895 88.0 0.14934 0.50575
    1 358.793116 6 0.0 0.00018 0.00000
    2 365.735560 6 0.0 0.00018 0.00000
    3 369.040987 6 1.0 0.00018 0.00575
    4 404.515142 5 0.0 0.00015 0.00000
    5 413.559246 5 0.0 0.00015 0.00000
    6 389.403711 5 0.0 0.00015 0.00000
    7 395.501264 5 0.0 0.00015 0.00000
    8 379.692310 5 0.0 0.00015 0.00000
    9 372.320091 5 0.0 0.00015 0.00000
    Most correlated values with 10 dist from cen:
    Column Correlation with 10 dist from cen
    0 10 dist from cen 1.000000
    1 11 dist from cen 0.792786
    2 9 dist from cen 0.700233
    3 12 dist from cen 0.493608
    4 8 dist from cen 0.488509
    5 6 dist from cen 0.424319
    111 between_digits_angle_ccw_sum -0.250501
    112 height_digit_12 -0.267602
    113 area_digit_10 -0.276785
    114 width_digit_10 -0.295011
    115 area_digit_12 -0.296367

    11 dist from cen

    (Jump to top)
    11 dist from cen Train Validation
    0 count 27201.000000 279.000000
    1 mean 368.235873 368.939471
    2 std 48.425983 48.268612
    3 min 11.335784 215.352037
    4 25% 342.212288 343.152948
    5 50% 372.667412 371.481157
    6 75% 399.011278 403.296699
    7 max 613.843832 495.745903
    8 unique values 22258.000000 279.000000
    9 unique values / count 0.679100 0.770700
    10 NaNs 5576.000000 83.000000
    Most popular values (NaN = -999):
    11 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5576 83.0 0.17012 0.47159
    1 350.089274 8 0.0 0.00024 0.00000
    2 392.698994 7 0.0 0.00021 0.00000
    3 410.766661 6 1.0 0.00018 0.00568
    4 378.123326 6 0.0 0.00018 0.00000
    5 371.726042 6 0.0 0.00018 0.00000
    6 397.109871 5 0.0 0.00015 0.00000
    7 382.382335 5 0.0 0.00015 0.00000
    8 373.915097 5 0.0 0.00015 0.00000
    9 348.500359 5 0.0 0.00015 0.00000
    Most correlated values with 11 dist from cen:
    Column Correlation with 11 dist from cen
    0 11 dist from cen 1.000000
    1 10 dist from cen 0.792786
    2 12 dist from cen 0.649639
    3 9 dist from cen 0.534832
    4 1 dist from cen 0.435573
    5 2 dist from cen 0.416683
    111 area_digit_11 -0.281544
    112 area_digit_10 -0.282686
    113 width_digit_10 -0.297822
    114 height_digit_12 -0.302116
    115 area_digit_12 -0.329968

    12 dist from cen

    (Jump to top)
    12 dist from cen Train Validation
    0 count 28937.000000 291.000000
    1 mean 370.796838 370.891134
    2 std 48.005863 56.899557
    3 min 22.102036 94.366308
    4 25% 348.353987 342.058650
    5 50% 377.180328 381.878580
    6 75% 401.186366 406.138700
    7 max 659.571073 571.075520
    8 unique values 21357.000000 292.000000
    9 unique values / count 0.651600 0.806600
    10 NaNs 3840.000000 71.000000
    Most popular values (NaN = -999):
    12 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 3840 71.0 0.11716 0.355
    1 386.505175 9 0.0 0.00027 0.000
    2 380.573909 8 0.0 0.00024 0.000
    3 386.000324 7 0.0 0.00021 0.000
    4 370.005405 7 0.0 0.00021 0.000
    5 383.720276 7 0.0 0.00021 0.000
    6 398.130946 7 0.0 0.00021 0.000
    7 360.699667 7 0.0 0.00021 0.000
    8 381.878580 6 1.0 0.00018 0.005
    9 402.657423 6 0.0 0.00018 0.000
    Most correlated values with 12 dist from cen:
    Column Correlation with 12 dist from cen
    0 12 dist from cen 1.000000
    1 1 dist from cen 0.658284
    2 11 dist from cen 0.649639
    3 2 dist from cen 0.561495
    4 10 dist from cen 0.493608
    5 3 dist from cen 0.491578
    111 height_digit_3 -0.319467
    112 area_digit_3 -0.322698
    113 width_digit_12 -0.351274
    114 height_digit_12 -0.412872
    115 area_digit_12 -0.440424

    2 dist from cen

    (Jump to top)
    2 dist from cen Train Validation
    0 count 27855.000000 295.000000
    1 mean 349.116177 340.248534
    2 std 53.313076 58.972478
    3 min 7.905694 95.630800
    4 25% 320.153479 307.196385
    5 50% 353.802911 348.680728
    6 75% 383.428285 379.130664
    7 max 568.624876 486.444498
    8 unique values 22905.000000 295.000000
    9 unique values / count 0.698800 0.814900
    10 NaNs 4922.000000 67.000000
    Most popular values (NaN = -999):
    2 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4922 67.0 0.15017 0.42405
    1 317.539368 7 0.0 0.00021 0.00000
    2 389.168986 6 0.0 0.00018 0.00000
    3 350.708212 6 0.0 0.00018 0.00000
    4 347.940369 6 0.0 0.00018 0.00000
    5 357.220170 6 0.0 0.00018 0.00000
    6 356.863069 6 0.0 0.00018 0.00000
    7 340.009191 5 0.0 0.00015 0.00000
    8 364.900671 5 0.0 0.00015 0.00000
    9 350.058924 5 0.0 0.00015 0.00000
    Most correlated values with 2 dist from cen:
    Column Correlation with 2 dist from cen
    0 2 dist from cen 1.000000
    1 3 dist from cen 0.725259
    2 1 dist from cen 0.709005
    3 12 dist from cen 0.561495
    4 4 dist from cen 0.509722
    5 minute_hand_length 0.441396
    111 height_digit_2 -0.307129
    112 width_digit_12 -0.308594
    113 area_digit_2 -0.309652
    114 height_digit_3 -0.312967
    115 area_digit_12 -0.342764

    3 dist from cen

    (Jump to top)
    3 dist from cen Train Validation
    0 count 28612.000000 297.000000
    1 mean 337.542587 328.683359
    2 std 51.175381 57.556793
    3 min 15.206906 109.592427
    4 25% 308.950947 297.228027
    5 50% 343.432854 338.913337
    6 75% 371.767737 363.792867
    7 max 611.333379 456.444137
    8 unique values 23065.000000 298.000000
    9 unique values / count 0.703700 0.823200
    10 NaNs 4165.000000 65.000000
    Most popular values (NaN = -999):
    3 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4165 65.0 0.12707 0.3869
    1 380.644848 7 0.0 0.00021 0.0000
    2 350.160677 7 0.0 0.00021 0.0000
    3 296.459525 6 0.0 0.00018 0.0000
    4 349.017550 6 0.0 0.00018 0.0000
    5 342.806797 6 0.0 0.00018 0.0000
    6 339.214165 6 0.0 0.00018 0.0000
    7 388.538930 6 0.0 0.00018 0.0000
    8 296.095002 5 0.0 0.00015 0.0000
    9 376.158544 5 0.0 0.00015 0.0000
    Most correlated values with 3 dist from cen:
    Column Correlation with 3 dist from cen
    0 3 dist from cen 1.000000
    1 4 dist from cen 0.761953
    2 2 dist from cen 0.725259
    3 5 dist from cen 0.547068
    4 1 dist from cen 0.542965
    5 12 dist from cen 0.491578
    111 area_digit_3 -0.355528
    112 height_digit_6 -0.373873
    113 area_digit_12 -0.381927
    114 width_digit_12 -0.389002
    115 height_digit_3 -0.416407

    4 dist from cen

    (Jump to top)
    4 dist from cen Train Validation
    0 count 27251.000000 269.000000
    1 mean 336.085919 327.209271
    2 std 47.456872 55.793071
    3 min 6.519202 111.803399
    4 25% 309.714788 298.724371
    5 50% 340.306480 336.473625
    6 75% 367.008855 364.111247
    7 max 580.975473 437.433424
    8 unique values 22437.000000 269.000000
    9 unique values / count 0.684500 0.743100
    10 NaNs 5526.000000 93.000000
    Most popular values (NaN = -999):
    4 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5526 93.0 0.16859 0.5407
    1 319.330550 8 0.0 0.00024 0.0000
    2 362.913557 6 0.0 0.00018 0.0000
    3 337.662924 6 0.0 0.00018 0.0000
    4 352.709867 6 0.0 0.00018 0.0000
    5 359.336194 6 0.0 0.00018 0.0000
    6 367.355754 5 0.0 0.00015 0.0000
    7 318.814209 5 0.0 0.00015 0.0000
    8 340.648279 5 0.0 0.00015 0.0000
    9 323.235595 5 0.0 0.00015 0.0000
    Most correlated values with 4 dist from cen:
    Column Correlation with 4 dist from cen
    0 4 dist from cen 1.000000
    1 5 dist from cen 0.776762
    2 3 dist from cen 0.761953
    3 2 dist from cen 0.509722
    4 6 dist from cen 0.471918
    5 9 dist from cen 0.408276
    111 height_digit_4 -0.258472
    112 area_digit_12 -0.272054
    113 width_digit_12 -0.273842
    114 height_digit_6 -0.282919
    115 height_digit_3 -0.307230

    5 dist from cen

    (Jump to top)
    5 dist from cen Train Validation
    0 count 26092.000000 265.000000
    1 mean 335.550313 329.190015
    2 std 46.910977 53.567646
    3 min 7.826238 129.468143
    4 25% 309.358914 302.278431
    5 50% 339.694716 334.228963
    6 75% 366.316888 366.500341
    7 max 520.454849 478.221967
    8 unique values 21245.000000 266.000000
    9 unique values / count 0.648200 0.734800
    10 NaNs 6685.000000 97.000000
    Most popular values (NaN = -999):
    5 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 6685 97.0 0.20395 0.53005
    1 350.615530 8 0.0 0.00024 0.00000
    2 361.749222 7 0.0 0.00021 0.00000
    3 344.372618 6 0.0 0.00018 0.00000
    4 328.506088 6 0.0 0.00018 0.00000
    5 359.584830 6 0.0 0.00018 0.00000
    6 340.424735 6 0.0 0.00018 0.00000
    7 329.639955 6 0.0 0.00018 0.00000
    8 346.375880 6 0.0 0.00018 0.00000
    9 346.301891 6 0.0 0.00018 0.00000
    Most correlated values with 5 dist from cen:
    Column Correlation with 5 dist from cen
    0 5 dist from cen 1.000000
    1 4 dist from cen 0.776762
    2 6 dist from cen 0.635754
    3 3 dist from cen 0.547068
    4 9 dist from cen 0.415726
    5 10 dist from cen 0.412986
    111 height_digit_12 -0.243856
    112 area_digit_4 -0.252993
    113 height_digit_3 -0.258554
    114 height_digit_5 -0.270411
    115 area_digit_12 -0.278165

    6 dist from cen

    (Jump to top)
    6 dist from cen Train Validation
    0 count 28407.000000 290.000000
    1 mean 353.017822 346.825546
    2 std 47.096105 55.305610
    3 min 3.535534 61.241326
    4 25% 328.613374 321.629391
    5 50% 358.558224 355.854575
    6 75% 384.157520 382.021114
    7 max 586.950168 535.724276
    8 unique values 21664.000000 290.000000
    9 unique values / count 0.661000 0.801100
    10 NaNs 4370.000000 72.000000
    Most popular values (NaN = -999):
    6 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4370 72.0 0.13333 0.4186
    1 336.544945 11 0.0 0.00034 0.0000
    2 355.431076 8 0.0 0.00024 0.0000
    3 373.505355 8 0.0 0.00024 0.0000
    4 357.022408 8 0.0 0.00024 0.0000
    5 362.500000 8 0.0 0.00024 0.0000
    6 351.569694 7 0.0 0.00021 0.0000
    7 386.369318 7 0.0 0.00021 0.0000
    8 365.041436 6 0.0 0.00018 0.0000
    9 370.178673 6 0.0 0.00018 0.0000
    Most correlated values with 6 dist from cen:
    Column Correlation with 6 dist from cen
    0 6 dist from cen 1.000000
    1 7 dist from cen 0.683981
    2 5 dist from cen 0.635754
    3 8 dist from cen 0.550448
    4 9 dist from cen 0.507077
    5 4 dist from cen 0.471918
    111 width_digit_10 -0.274481
    112 area_digit_7 -0.280505
    113 area_digit_6 -0.304178
    114 area_digit_12 -0.320258
    115 height_digit_12 -0.326151

    7 dist from cen

    (Jump to top)
    7 dist from cen Train Validation
    0 count 28555.000000 288.000000
    1 mean 368.547709 364.170914
    2 std 50.956366 61.554328
    3 min 14.422205 36.674242
    4 25% 342.429190 337.497126
    5 50% 372.874309 372.334472
    6 75% 400.218222 399.518538
    7 max 666.132119 658.555427
    8 unique values 23415.000000 289.000000
    9 unique values / count 0.714400 0.798300
    10 NaNs 4222.000000 74.000000
    Most popular values (NaN = -999):
    7 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4222 74.0 0.12881 0.45399
    1 388.417623 7 0.0 0.00021 0.00000
    2 373.283404 7 0.0 0.00021 0.00000
    3 357.424817 6 0.0 0.00018 0.00000
    4 375.016333 6 0.0 0.00018 0.00000
    5 360.695509 6 0.0 0.00018 0.00000
    6 389.546210 6 0.0 0.00018 0.00000
    7 396.011364 5 0.0 0.00015 0.00000
    8 386.589252 5 0.0 0.00015 0.00000
    9 366.034493 5 0.0 0.00015 0.00000
    Most correlated values with 7 dist from cen:
    Column Correlation with 7 dist from cen
    0 7 dist from cen 1.000000
    1 8 dist from cen 0.753724
    2 6 dist from cen 0.683981
    3 9 dist from cen 0.573894
    4 10 dist from cen 0.398767
    5 5 dist from cen 0.396155
    111 final_rotation_angle -0.209483
    112 width_digit_9 -0.213543
    113 width_digit_10 -0.214479
    114 width_digit_8 -0.229184
    115 height_digit_12 -0.236198

    8 dist from cen

    (Jump to top)
    8 dist from cen Train Validation
    0 count 28755.000000 292.000000
    1 mean 370.329200 362.324910
    2 std 51.562665 64.947323
    3 min 8.139410 72.090221
    4 25% 345.253711 336.208249
    5 50% 375.153635 371.319385
    6 75% 401.983364 399.285841
    7 max 608.481717 529.761503
    8 unique values 23604.000000 292.000000
    9 unique values / count 0.720100 0.806600
    10 NaNs 4022.000000 70.000000
    Most popular values (NaN = -999):
    8 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4022 70.0 0.12271 0.39773
    1 403.133043 7 0.0 0.00021 0.00000
    2 334.680818 6 0.0 0.00018 0.00000
    3 368.730932 6 0.0 0.00018 0.00000
    4 395.823509 6 0.0 0.00018 0.00000
    5 402.852951 6 0.0 0.00018 0.00000
    6 395.379565 6 0.0 0.00018 0.00000
    7 358.650944 6 0.0 0.00018 0.00000
    8 383.038510 6 0.0 0.00018 0.00000
    9 382.638864 5 0.0 0.00015 0.00000
    Most correlated values with 8 dist from cen:
    Column Correlation with 8 dist from cen
    0 8 dist from cen 1.000000
    1 7 dist from cen 0.753724
    2 9 dist from cen 0.718492
    3 6 dist from cen 0.550448
    4 10 dist from cen 0.488509
    5 5 dist from cen 0.368272
    111 width_digit_8 -0.218300
    112 height_digit_12 -0.221123
    113 width_digit_10 -0.228457
    114 area_digit_8 -0.229836
    115 between_digits_angle_ccw_sum -0.358946

    9 dist from cen

    (Jump to top)
    9 dist from cen Train Validation
    0 count 26974.000000 255.000000
    1 mean 375.631690 376.453994
    2 std 45.795291 45.449431
    3 min 14.115594 146.069333
    4 25% 353.542784 351.260403
    5 50% 381.106284 382.400641
    6 75% 404.633476 403.347755
    7 max 620.016935 486.066868
    8 unique values 20996.000000 256.000000
    9 unique values / count 0.640600 0.707200
    10 NaNs 5803.000000 107.000000
    Most popular values (NaN = -999):
    9 dist from cen Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5803 107.0 0.17704 0.50711
    1 367.268635 10 0.0 0.00031 0.00000
    2 392.436301 8 0.0 0.00024 0.00000
    3 397.724779 7 0.0 0.00021 0.00000
    4 400.078117 6 0.0 0.00018 0.00000
    5 411.024634 6 0.0 0.00018 0.00000
    6 394.325373 6 0.0 0.00018 0.00000
    7 392.183630 6 0.0 0.00018 0.00000
    8 405.007716 6 0.0 0.00018 0.00000
    9 397.570748 6 0.0 0.00018 0.00000
    Most correlated values with 9 dist from cen:
    Column Correlation with 9 dist from cen
    0 9 dist from cen 1.000000
    1 8 dist from cen 0.718492
    2 10 dist from cen 0.700233
    3 7 dist from cen 0.573894
    4 11 dist from cen 0.534832
    5 6 dist from cen 0.507077
    111 height_digit_12 -0.245396
    112 width_digit_12 -0.248441
    113 area_digit_8 -0.258230
    114 area_digit_12 -0.280400
    115 width_digit_10 -0.281388

    euc_dist_digit_1

    (Jump to top)
    euc_dist_digit_1 Train Validation
    0 count 25400.000000 255.000000
    1 mean 30.287315 28.712415
    2 std 33.877417 31.931081
    3 min 0.000760 0.136073
    4 25% 6.680591 6.780543
    5 50% 14.935799 15.059592
    6 75% 37.094044 32.842662
    7 max 119.957644 112.333641
    8 unique values 23913.000000 256.000000
    9 unique values / count 0.729600 0.707200
    10 NaNs 7377.000000 107.000000
    Most popular values (NaN = -999):
    euc_dist_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 7377 107.0 0.22507 0.72297
    1 3.595678 4 0.0 0.00012 0.00000
    2 5.710511 4 0.0 0.00012 0.00000
    3 4.864260 4 0.0 0.00012 0.00000
    4 7.281675 4 0.0 0.00012 0.00000
    5 12.124343 4 0.0 0.00012 0.00000
    6 0.787928 4 0.0 0.00012 0.00000
    7 8.216260 4 0.0 0.00012 0.00000
    8 9.112259 4 0.0 0.00012 0.00000
    9 1.896679 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_1:
    Column Correlation with euc_dist_digit_1
    0 euc_dist_digit_1 1.000000
    1 euc_dist_digit_12 0.895557
    2 deviation_dist_from_mid_axis 0.883553
    3 euc_dist_digit_3 0.865873
    4 euc_dist_digit_11 0.852942
    5 euc_dist_digit_10 0.833996
    111 height_digit_8 -0.474169
    112 height_digit_9 -0.479460
    113 height_digit_7 -0.483864
    114 height_digit_6 -0.499671
    115 height_digit_1 -0.566982

    euc_dist_digit_2

    (Jump to top)
    euc_dist_digit_2 Train Validation
    0 count 27800.000000 295.000000
    1 mean 32.834984 35.414626
    2 std 31.828580 31.027786
    3 min 0.003261 0.383323
    4 25% 8.930655 10.757488
    5 50% 20.012485 25.436234
    6 75% 46.669563 50.958705
    7 max 119.906309 116.282230
    8 unique values 26417.000000 296.000000
    9 unique values / count 0.806000 0.817700
    10 NaNs 4977.000000 67.000000
    Most popular values (NaN = -999):
    euc_dist_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4977 67.0 0.15184 0.78824
    1 26.861567 4 0.0 0.00012 0.00000
    2 0.973594 4 0.0 0.00012 0.00000
    3 15.903987 3 0.0 0.00009 0.00000
    4 9.786820 3 0.0 0.00009 0.00000
    5 8.713237 3 0.0 0.00009 0.00000
    6 19.036402 3 0.0 0.00009 0.00000
    7 9.794903 3 0.0 0.00009 0.00000
    8 11.994654 3 0.0 0.00009 0.00000
    9 21.202235 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_2:
    Column Correlation with euc_dist_digit_2
    0 euc_dist_digit_2 1.000000
    1 euc_dist_digit_3 0.817747
    2 euc_dist_digit_1 0.806897
    3 deviation_dist_from_mid_axis 0.780278
    4 euc_dist_digit_4 0.779112
    5 euc_dist_digit_5 0.775221
    111 height_digit_8 -0.393057
    112 height_digit_9 -0.400821
    113 height_digit_7 -0.415490
    114 height_digit_6 -0.418058
    115 height_digit_1 -0.497848

    euc_dist_digit_3

    (Jump to top)
    euc_dist_digit_3 Train Validation
    0 count 28603.000000 297.000000
    1 mean 33.031035 34.606700
    2 std 33.060628 31.404319
    3 min 0.000000 0.260000
    4 25% 8.190000 10.010000
    5 50% 18.980000 22.490000
    6 75% 47.840000 51.350000
    7 max 119.860000 114.400000
    8 unique values 912.000000 227.000000
    9 unique values / count 0.027800 0.627100
    10 NaNs 4174.000000 65.000000
    Most popular values (NaN = -999):
    euc_dist_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 4174 65.0 0.12735 0.17956
    1 1.69 145 0.0 0.00442 0.00000
    2 1.43 135 2.0 0.00412 0.00552
    3 3.12 134 2.0 0.00409 0.00552
    4 3.38 131 1.0 0.00400 0.00276
    5 5.46 129 1.0 0.00394 0.00276
    6 3.51 129 2.0 0.00394 0.00552
    7 0.52 129 1.0 0.00394 0.00276
    8 0.65 128 0.0 0.00391 0.00000
    9 0.26 128 1.0 0.00391 0.00276
    Most correlated values with euc_dist_digit_3:
    Column Correlation with euc_dist_digit_3
    0 euc_dist_digit_3 1.000000
    1 deviation_dist_from_mid_axis 0.938323
    2 euc_dist_digit_4 0.905836
    3 euc_dist_digit_5 0.876202
    4 euc_dist_digit_1 0.865873
    5 euc_dist_digit_12 0.856718
    111 height_digit_8 -0.471607
    112 height_digit_9 -0.484704
    113 height_digit_7 -0.489826
    114 height_digit_6 -0.496467
    115 height_digit_1 -0.586916

    euc_dist_digit_4

    (Jump to top)
    euc_dist_digit_4 Train Validation
    0 count 27238.000000 269.000000
    1 mean 32.049520 32.086552
    2 std 31.662544 31.279015
    3 min 0.000760 0.068572
    4 25% 8.141702 7.743508
    5 50% 19.097071 18.212570
    6 75% 45.860710 48.230000
    7 max 119.937391 113.624643
    8 unique values 25887.000000 269.000000
    9 unique values / count 0.789800 0.743100
    10 NaNs 5539.000000 93.000000
    Most popular values (NaN = -999):
    euc_dist_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5539 93.0 0.16899 0.78151
    1 4.045676 5 0.0 0.00015 0.00000
    2 22.375006 4 0.0 0.00012 0.00000
    3 0.119260 3 0.0 0.00009 0.00000
    4 1.320989 3 0.0 0.00009 0.00000
    5 0.155009 3 0.0 0.00009 0.00000
    6 10.977007 3 0.0 0.00009 0.00000
    7 0.294989 3 0.0 0.00009 0.00000
    8 88.566730 3 0.0 0.00009 0.00000
    9 0.485927 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_4:
    Column Correlation with euc_dist_digit_4
    0 euc_dist_digit_4 1.000000
    1 euc_dist_digit_3 0.905836
    2 euc_dist_digit_5 0.898449
    3 deviation_dist_from_mid_axis 0.874681
    4 euc_dist_digit_7 0.840853
    5 euc_dist_digit_8 0.828104
    111 height_digit_8 -0.447081
    112 height_digit_9 -0.454875
    113 height_digit_6 -0.462422
    114 height_digit_7 -0.466630
    115 height_digit_1 -0.548491

    euc_dist_digit_5

    (Jump to top)
    euc_dist_digit_5 Train Validation
    0 count 26082.000000 264.000000
    1 mean 30.724226 30.474200
    2 std 30.055328 29.734478
    3 min 0.002345 0.197991
    4 25% 7.908380 7.617593
    5 50% 18.527444 18.515487
    6 75% 44.763078 44.525574
    7 max 119.643227 108.472561
    8 unique values 24739.000000 265.000000
    9 unique values / count 0.754800 0.732000
    10 NaNs 6695.000000 98.000000
    Most popular values (NaN = -999):
    euc_dist_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 6695 98.0 0.20426 0.79032
    1 0.377406 5 0.0 0.00015 0.00000
    2 2.595489 4 0.0 0.00012 0.00000
    3 1.444156 4 0.0 0.00012 0.00000
    4 5.466073 4 0.0 0.00012 0.00000
    5 0.457656 4 0.0 0.00012 0.00000
    6 14.191904 4 0.0 0.00012 0.00000
    7 0.112428 4 0.0 0.00012 0.00000
    8 12.389925 3 0.0 0.00009 0.00000
    9 8.774174 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_5:
    Column Correlation with euc_dist_digit_5
    0 euc_dist_digit_5 1.000000
    1 euc_dist_digit_4 0.898449
    2 euc_dist_digit_3 0.876202
    3 deviation_dist_from_mid_axis 0.861845
    4 euc_dist_digit_6 0.836368
    5 euc_dist_digit_8 0.830755
    111 height_digit_8 -0.429745
    112 height_digit_9 -0.438334
    113 height_digit_7 -0.442409
    114 height_digit_6 -0.443804
    115 height_digit_1 -0.527828

    euc_dist_digit_6

    (Jump to top)
    euc_dist_digit_6 Train Validation
    0 count 28394.000000 290.000000
    1 mean 28.135344 27.565379
    2 std 31.245333 30.533193
    3 min 0.000000 0.000000
    4 25% 6.240000 5.492500
    5 50% 14.300000 13.975000
    6 75% 35.100000 39.227500
    7 max 119.730000 115.700000
    8 unique values 889.000000 209.000000
    9 unique values / count 0.027100 0.577300
    10 NaNs 4383.000000 72.000000
    Most popular values (NaN = -999):
    euc_dist_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 4383 72.0 0.13372 0.19890
    1 4.55 173 3.0 0.00528 0.00829
    2 1.43 172 0.0 0.00525 0.00000
    3 3.64 170 3.0 0.00519 0.00829
    4 0.78 168 0.0 0.00513 0.00000
    5 3.51 168 1.0 0.00513 0.00276
    6 2.47 167 4.0 0.00510 0.01105
    7 4.68 164 1.0 0.00500 0.00276
    8 0.91 163 0.0 0.00497 0.00000
    9 1.04 162 1.0 0.00494 0.00276
    Most correlated values with euc_dist_digit_6:
    Column Correlation with euc_dist_digit_6
    0 euc_dist_digit_6 1.000000
    1 deviation_dist_from_mid_axis 0.930912
    2 euc_dist_digit_7 0.880315
    3 euc_dist_digit_9 0.866827
    4 euc_dist_digit_8 0.859128
    5 euc_dist_digit_5 0.836368
    111 height_digit_8 -0.453401
    112 height_digit_7 -0.465866
    113 height_digit_9 -0.475632
    114 height_digit_6 -0.504996
    115 height_digit_1 -0.534603

    euc_dist_digit_7

    (Jump to top)
    euc_dist_digit_7 Train Validation
    0 count 28491.000000 286.000000
    1 mean 30.886070 29.746576
    2 std 33.028061 31.199643
    3 min 0.000491 0.015987
    4 25% 6.903923 5.957112
    5 50% 15.724508 16.274192
    6 75% 44.079657 42.165667
    7 max 119.997479 115.930144
    8 unique values 26646.000000 287.000000
    9 unique values / count 0.812900 0.792800
    10 NaNs 4286.000000 76.000000
    Most popular values (NaN = -999):
    euc_dist_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4286 76.0 0.13076 0.7037
    1 1.007512 4 0.0 0.00012 0.0000
    2 3.442655 4 0.0 0.00012 0.0000
    3 5.199820 4 0.0 0.00012 0.0000
    4 0.532594 4 0.0 0.00012 0.0000
    5 0.641761 4 0.0 0.00012 0.0000
    6 8.888319 4 0.0 0.00012 0.0000
    7 0.062679 4 0.0 0.00012 0.0000
    8 3.947404 4 0.0 0.00012 0.0000
    9 6.066569 4 0.0 0.00012 0.0000
    Most correlated values with euc_dist_digit_7:
    Column Correlation with euc_dist_digit_7
    0 euc_dist_digit_7 1.000000
    1 euc_dist_digit_8 0.922580
    2 deviation_dist_from_mid_axis 0.891760
    3 euc_dist_digit_6 0.880315
    4 euc_dist_digit_9 0.873219
    5 euc_dist_digit_10 0.860499
    111 height_digit_8 -0.467701
    112 height_digit_9 -0.477758
    113 height_digit_7 -0.483906
    114 height_digit_6 -0.488399
    115 height_digit_1 -0.542421

    euc_dist_digit_8

    (Jump to top)
    euc_dist_digit_8 Train Validation
    0 count 28641.000000 290.000000
    1 mean 32.250843 31.948840
    2 std 33.840305 32.820881
    3 min 0.000515 0.075919
    4 25% 7.413237 7.508427
    5 50% 16.984068 18.110081
    6 75% 46.748061 43.715447
    7 max 119.838808 118.501563
    8 unique values 27017.000000 290.000000
    9 unique values / count 0.824300 0.801100
    10 NaNs 4136.000000 72.000000
    Most popular values (NaN = -999):
    euc_dist_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4136 72.0 0.12619 0.63717
    1 1.745511 5 0.0 0.00015 0.00000
    2 2.333012 4 0.0 0.00012 0.00000
    3 1.573821 4 0.0 0.00012 0.00000
    4 7.865760 4 0.0 0.00012 0.00000
    5 1.499511 4 0.0 0.00012 0.00000
    6 1.247594 4 0.0 0.00012 0.00000
    7 11.754236 4 0.0 0.00012 0.00000
    8 1.809596 4 0.0 0.00012 0.00000
    9 8.127321 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_8:
    Column Correlation with euc_dist_digit_8
    0 euc_dist_digit_8 1.000000
    1 euc_dist_digit_7 0.922580
    2 euc_dist_digit_9 0.919703
    3 deviation_dist_from_mid_axis 0.905678
    4 euc_dist_digit_10 0.899548
    5 euc_dist_digit_11 0.871677
    111 height_digit_8 -0.467785
    112 height_digit_9 -0.478848
    113 height_digit_7 -0.478994
    114 height_digit_6 -0.497087
    115 height_digit_1 -0.556503

    euc_dist_digit_9

    (Jump to top)
    euc_dist_digit_9 Train Validation
    0 count 2.693600e+04 254.000000
    1 mean 3.125026e+01 27.349134
    2 std 3.440961e+01 30.559390
    3 min 2.960000e-14 0.130000
    4 25% 6.890000e+00 6.727500
    5 50% 1.586000e+01 15.145000
    6 75% 4.085250e+01 31.427500
    7 max 1.199900e+02 111.410000
    8 unique values 9.190000e+02 192.000000
    9 unique values / count 2.800000e-02 0.530400
    10 NaNs 5.841000e+03 108.000000
    Most popular values (NaN = -999):
    euc_dist_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 5841 108.0 0.17820 0.29917
    1 0.39 157 1.0 0.00479 0.00277
    2 2.73 150 1.0 0.00458 0.00277
    3 0.91 149 0.0 0.00455 0.00000
    4 4.42 148 1.0 0.00452 0.00277
    5 0.65 145 2.0 0.00442 0.00554
    6 3.12 143 1.0 0.00436 0.00277
    7 1.82 143 1.0 0.00436 0.00277
    8 4.68 142 0.0 0.00433 0.00000
    9 1.95 141 2.0 0.00430 0.00554
    Most correlated values with euc_dist_digit_9:
    Column Correlation with euc_dist_digit_9
    0 euc_dist_digit_9 1.000000
    1 deviation_dist_from_mid_axis 0.946361
    2 euc_dist_digit_10 0.933982
    3 euc_dist_digit_8 0.919703
    4 euc_dist_digit_11 0.894293
    5 euc_dist_digit_7 0.873219
    111 height_digit_8 -0.451736
    112 height_digit_7 -0.460248
    113 height_digit_6 -0.471743
    114 height_digit_9 -0.473352
    115 height_digit_1 -0.537054

    euc_dist_digit_10

    (Jump to top)
    euc_dist_digit_10 Train Validation
    0 count 27838.000000 274.000000
    1 mean 33.247571 31.575679
    2 std 34.375507 32.721449
    3 min 0.002010 0.096238
    4 25% 7.541077 7.954299
    5 50% 17.561152 17.099009
    6 75% 50.597531 43.558049
    7 max 119.915642 114.565815
    8 unique values 26196.000000 275.000000
    9 unique values / count 0.799200 0.759700
    10 NaNs 4939.000000 88.000000
    Most popular values (NaN = -999):
    euc_dist_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 4939 88.0 0.15068 0.74576
    1 1.787177 4 0.0 0.00012 0.00000
    2 17.805758 4 0.0 0.00012 0.00000
    3 10.184927 4 0.0 0.00012 0.00000
    4 15.407902 3 0.0 0.00009 0.00000
    5 0.116179 3 0.0 0.00009 0.00000
    6 17.705925 3 0.0 0.00009 0.00000
    7 27.263671 3 0.0 0.00009 0.00000
    8 5.303094 3 0.0 0.00009 0.00000
    9 1.868678 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_10:
    Column Correlation with euc_dist_digit_10
    0 euc_dist_digit_10 1.000000
    1 euc_dist_digit_11 0.935773
    2 euc_dist_digit_9 0.933982
    3 euc_dist_digit_8 0.899548
    4 deviation_dist_from_mid_axis 0.898446
    5 euc_dist_digit_7 0.860499
    111 height_digit_8 -0.471268
    112 height_digit_9 -0.480029
    113 height_digit_7 -0.484636
    114 height_digit_6 -0.489475
    115 height_digit_1 -0.555484

    euc_dist_digit_11

    (Jump to top)
    euc_dist_digit_11 Train Validation
    0 count 27151.000000 277.000000
    1 mean 32.644335 33.387692
    2 std 34.165306 33.296209
    3 min 0.001071 0.095011
    4 25% 7.369695 7.795154
    5 50% 17.209259 18.661318
    6 75% 48.329409 54.821571
    7 max 119.855309 116.061975
    8 unique values 25628.000000 278.000000
    9 unique values / count 0.781900 0.768000
    10 NaNs 5626.000000 85.000000
    Most popular values (NaN = -999):
    euc_dist_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 5626 85.0 0.17164 0.71429
    1 2.607928 4 0.0 0.00012 0.00000
    2 7.643070 4 0.0 0.00012 0.00000
    3 6.260653 4 0.0 0.00012 0.00000
    4 2.876322 4 0.0 0.00012 0.00000
    5 23.674066 3 0.0 0.00009 0.00000
    6 7.437821 3 0.0 0.00009 0.00000
    7 95.135146 3 0.0 0.00009 0.00000
    8 1.217428 3 0.0 0.00009 0.00000
    9 37.107898 3 0.0 0.00009 0.00000
    Most correlated values with euc_dist_digit_11:
    Column Correlation with euc_dist_digit_11
    0 euc_dist_digit_11 1.000000
    1 euc_dist_digit_10 0.935773
    2 euc_dist_digit_9 0.894293
    3 deviation_dist_from_mid_axis 0.893389
    4 euc_dist_digit_12 0.872921
    5 euc_dist_digit_8 0.871677
    111 height_digit_8 -0.467289
    112 height_digit_9 -0.479624
    113 height_digit_7 -0.488127
    114 height_digit_6 -0.491141
    115 height_digit_1 -0.560891

    euc_dist_digit_12

    (Jump to top)
    euc_dist_digit_12 Train Validation
    0 count 28909.000000 290.000000
    1 mean 28.629239 28.940690
    2 std 35.018626 34.119917
    3 min 0.000000 0.130000
    4 25% 5.200000 5.915000
    5 50% 11.960000 13.000000
    6 75% 31.330000 35.847500
    7 max 119.990000 119.600000
    8 unique values 912.000000 194.000000
    9 unique values / count 0.027800 0.535900
    10 NaNs 3868.000000 72.000000
    Most popular values (NaN = -999):
    euc_dist_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 3868 72.0 0.11801 0.19890
    1 2.60 230 4.0 0.00702 0.01105
    2 1.56 209 2.0 0.00638 0.00552
    3 4.42 203 3.0 0.00619 0.00829
    4 2.34 199 2.0 0.00607 0.00552
    5 0.13 197 1.0 0.00601 0.00276
    6 1.04 194 1.0 0.00592 0.00276
    7 1.82 193 2.0 0.00589 0.00552
    8 3.38 190 3.0 0.00580 0.00829
    9 0.52 189 3.0 0.00577 0.00829
    Most correlated values with euc_dist_digit_12:
    Column Correlation with euc_dist_digit_12
    0 euc_dist_digit_12 1.000000
    1 deviation_dist_from_mid_axis 0.942951
    2 euc_dist_digit_1 0.895557
    3 euc_dist_digit_11 0.872921
    4 euc_dist_digit_3 0.856718
    5 euc_dist_digit_10 0.851229
    111 height_digit_8 -0.477431
    112 height_digit_9 -0.490168
    113 height_digit_7 -0.501306
    114 height_digit_6 -0.514303
    115 height_digit_1 -0.571832

    area_digit_1

    (Jump to top)
    area_digit_1 Train Validation
    0 count 25448.000000 255.000000
    1 mean 2308.107671 2388.235294
    2 std 1070.213451 1198.736244
    3 min 640.000000 735.000000
    4 25% 1537.000000 1539.000000
    5 50% 2065.000000 2112.000000
    6 75% 2816.000000 2928.000000
    7 max 9870.000000 8890.000000
    8 unique values 1966.000000 223.000000
    9 unique values / count 0.060000 0.616000
    10 NaNs 7329.000000 107.000000
    Most popular values (NaN = -999):
    area_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 7329 107.0 0.22360 0.29972
    1 1512.0 143 0.0 0.00436 0.00000
    2 1456.0 128 0.0 0.00391 0.00000
    3 1350.0 117 3.0 0.00357 0.00840
    4 1680.0 112 0.0 0.00342 0.00000
    5 1560.0 109 1.0 0.00333 0.00280
    6 1300.0 109 3.0 0.00333 0.00840
    7 1740.0 105 1.0 0.00320 0.00280
    8 1400.0 104 2.0 0.00317 0.00560
    9 1728.0 103 2.0 0.00314 0.00560
    Most correlated values with area_digit_1:
    Column Correlation with area_digit_1
    0 area_digit_1 1.000000
    1 height_digit_1 0.626056
    2 area_digit_2 0.557006
    3 area_digit_3 0.544482
    4 area_digit_4 0.541271
    5 area_digit_12 0.539211
    111 11 dist from cen -0.219733
    112 2 dist from cen -0.228585
    113 3 dist from cen -0.242782
    114 1 dist from cen -0.261462
    115 12 dist from cen -0.274356

    area_digit_2

    (Jump to top)
    area_digit_2 Train Validation
    0 count 27855.000000 295.000000
    1 mean 4616.101562 4151.271186
    2 std 2365.657591 1909.520134
    3 min 768.000000 1188.000000
    4 25% 2942.000000 2756.000000
    5 50% 4104.000000 3721.000000
    6 75% 5716.000000 5045.000000
    7 max 25088.000000 12330.000000
    8 unique values 3201.000000 263.000000
    9 unique values / count 0.097700 0.726500
    10 NaNs 4922.000000 67.000000
    Most popular values (NaN = -999):
    area_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4922 67.0 0.15017 0.18715
    1 2640.0 68 0.0 0.00207 0.00000
    2 5040.0 65 1.0 0.00198 0.00279
    3 4200.0 63 0.0 0.00192 0.00000
    4 3120.0 61 1.0 0.00186 0.00279
    5 3024.0 57 3.0 0.00174 0.00838
    6 3360.0 57 1.0 0.00174 0.00279
    7 3960.0 56 1.0 0.00171 0.00279
    8 2880.0 56 0.0 0.00171 0.00000
    9 2520.0 56 0.0 0.00171 0.00000
    Most correlated values with area_digit_2:
    Column Correlation with area_digit_2
    0 area_digit_2 1.000000
    1 height_digit_2 0.849204
    2 width_digit_2 0.826914
    3 area_digit_3 0.646747
    4 area_digit_4 0.634894
    5 area_digit_12 0.600113
    111 11 dist from cen -0.233081
    112 1 dist from cen -0.269999
    113 12 dist from cen -0.306253
    114 3 dist from cen -0.307171
    115 2 dist from cen -0.309652

    area_digit_3

    (Jump to top)
    area_digit_3 Train Validation
    0 count 28612.000000 297.000000
    1 mean 5046.115231 4978.585859
    2 std 2569.549735 2535.028759
    3 min 828.000000 1369.000000
    4 25% 3240.000000 3315.000000
    5 50% 4508.000000 4453.000000
    6 75% 6231.000000 6237.000000
    7 max 31482.000000 16380.000000
    8 unique values 3624.000000 280.000000
    9 unique values / count 0.110600 0.773500
    10 NaNs 4165.000000 65.000000
    Most popular values (NaN = -999):
    area_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4165 65.0 0.12707 0.18362
    1 3600.0 61 1.0 0.00186 0.00282
    2 4080.0 60 0.0 0.00183 0.00000
    3 2520.0 57 1.0 0.00174 0.00282
    4 4620.0 55 0.0 0.00168 0.00000
    5 5040.0 54 1.0 0.00165 0.00282
    6 4200.0 53 0.0 0.00162 0.00000
    7 4050.0 53 0.0 0.00162 0.00000
    8 4320.0 51 0.0 0.00156 0.00000
    9 3900.0 51 0.0 0.00156 0.00000
    Most correlated values with area_digit_3:
    Column Correlation with area_digit_3
    0 area_digit_3 1.000000
    1 width_digit_3 0.807082
    2 height_digit_3 0.798223
    3 area_digit_4 0.680886
    4 area_digit_2 0.646747
    5 area_digit_6 0.639321
    111 1 dist from cen -0.264121
    112 2 dist from cen -0.280373
    113 between_digits_angle_ccw_sum -0.306574
    114 12 dist from cen -0.322698
    115 3 dist from cen -0.355528

    area_digit_4

    (Jump to top)
    area_digit_4 Train Validation
    0 count 27251.000000 269.000000
    1 mean 5793.115665 5627.163569
    2 std 2641.521129 2561.650984
    3 min 1036.000000 1620.000000
    4 25% 3915.000000 3780.000000
    5 50% 5270.000000 5070.000000
    6 75% 7105.500000 6820.000000
    7 max 29946.000000 19314.000000
    8 unique values 3816.000000 249.000000
    9 unique values / count 0.116400 0.687800
    10 NaNs 5526.000000 93.000000
    Most popular values (NaN = -999):
    area_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5526 93.0 0.16859 0.26124
    1 5040.0 70 0.0 0.00214 0.00000
    2 4680.0 58 0.0 0.00177 0.00000
    3 3696.0 55 1.0 0.00168 0.00281
    4 4536.0 53 0.0 0.00162 0.00000
    5 6720.0 52 2.0 0.00159 0.00562
    6 4320.0 50 2.0 0.00153 0.00562
    7 3600.0 50 1.0 0.00153 0.00281
    8 5940.0 49 0.0 0.00149 0.00000
    9 5100.0 46 1.0 0.00140 0.00281
    Most correlated values with area_digit_4:
    Column Correlation with area_digit_4
    0 area_digit_4 1.000000
    1 height_digit_4 0.766798
    2 width_digit_4 0.691913
    3 area_digit_3 0.680886
    4 area_digit_5 0.660022
    5 area_digit_7 0.656411
    111 5 dist from cen -0.252993
    112 1 dist from cen -0.262208
    113 2 dist from cen -0.272024
    114 12 dist from cen -0.318134
    115 3 dist from cen -0.325382

    area_digit_5

    (Jump to top)
    area_digit_5 Train Validation
    0 count 26092.000000 265.000000
    1 mean 7214.179250 6691.577358
    2 std 3474.474015 3008.717328
    3 min 1152.000000 1599.000000
    4 25% 4758.000000 4636.000000
    5 50% 6525.000000 6318.000000
    6 75% 8840.000000 8349.000000
    7 max 32200.000000 22912.000000
    8 unique values 4275.000000 252.000000
    9 unique values / count 0.130400 0.696100
    10 NaNs 6685.000000 97.000000
    Most popular values (NaN = -999):
    area_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 6685 97.0 0.20395 0.27324
    1 5040.0 59 1.0 0.00180 0.00282
    2 4620.0 52 0.0 0.00159 0.00000
    3 5460.0 42 0.0 0.00128 0.00000
    4 7560.0 42 0.0 0.00128 0.00000
    5 6300.0 41 1.0 0.00125 0.00282
    6 5760.0 39 0.0 0.00119 0.00000
    7 6480.0 39 0.0 0.00119 0.00000
    8 4536.0 39 0.0 0.00119 0.00000
    9 5544.0 38 0.0 0.00116 0.00000
    Most correlated values with area_digit_5:
    Column Correlation with area_digit_5
    0 area_digit_5 1.000000
    1 width_digit_5 0.830252
    2 height_digit_5 0.775551
    3 variance_area 0.678825
    4 area_digit_4 0.660022
    5 area_digit_8 0.603508
    111 2 dist from cen -0.212595
    112 6 dist from cen -0.229845
    113 5 dist from cen -0.235901
    114 3 dist from cen -0.253795
    115 12 dist from cen -0.266963

    area_digit_6

    (Jump to top)
    area_digit_6 Train Validation
    0 count 28407.000000 290.000000
    1 mean 6035.063259 6095.472414
    2 std 2742.576668 2732.864358
    3 min 805.000000 1584.000000
    4 25% 4041.000000 4002.500000
    5 50% 5580.000000 5678.500000
    6 75% 7560.000000 7717.500000
    7 max 27378.000000 19608.000000
    8 unique values 3960.000000 272.000000
    9 unique values / count 0.120800 0.751400
    10 NaNs 4370.000000 72.000000
    Most popular values (NaN = -999):
    area_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4370 72.0 0.13333 0.20571
    1 5040.0 64 0.0 0.00195 0.00000
    2 4050.0 58 0.0 0.00177 0.00000
    3 5280.0 52 1.0 0.00159 0.00286
    4 4368.0 50 0.0 0.00153 0.00000
    5 5460.0 49 0.0 0.00149 0.00000
    6 6720.0 48 0.0 0.00146 0.00000
    7 6930.0 47 0.0 0.00143 0.00000
    8 5544.0 47 1.0 0.00143 0.00286
    9 4704.0 46 0.0 0.00140 0.00000
    Most correlated values with area_digit_6:
    Column Correlation with area_digit_6
    0 area_digit_6 1.000000
    1 height_digit_6 0.781435
    2 area_digit_7 0.684934
    3 area_digit_9 0.669600
    4 area_digit_4 0.655595
    5 area_digit_8 0.653968
    111 5 dist from cen -0.242708
    112 2 dist from cen -0.250784
    113 12 dist from cen -0.291534
    114 6 dist from cen -0.304178
    115 3 dist from cen -0.326111

    area_digit_7

    (Jump to top)
    area_digit_7 Train Validation
    0 count 28555.000000 288.000000
    1 mean 4942.821748 4876.631944
    2 std 2221.963276 2252.685580
    3 min 777.000000 1054.000000
    4 25% 3312.000000 3243.750000
    5 50% 4559.000000 4469.000000
    6 75% 6120.000000 6102.500000
    7 max 22866.000000 17030.000000
    8 unique values 3400.000000 264.000000
    9 unique values / count 0.103700 0.729300
    10 NaNs 4222.000000 74.000000
    Most popular values (NaN = -999):
    area_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4222 74.0 0.12881 0.20845
    1 3600.0 59 1.0 0.00180 0.00282
    2 5040.0 57 0.0 0.00174 0.00000
    3 4050.0 56 0.0 0.00171 0.00000
    4 4704.0 54 0.0 0.00165 0.00000
    5 4536.0 54 0.0 0.00165 0.00000
    6 2520.0 54 0.0 0.00165 0.00000
    7 3150.0 53 0.0 0.00162 0.00000
    8 3360.0 53 0.0 0.00162 0.00000
    9 4800.0 51 0.0 0.00156 0.00000
    Most correlated values with area_digit_7:
    Column Correlation with area_digit_7
    0 area_digit_7 1.000000
    1 height_digit_7 0.746753
    2 area_digit_8 0.697879
    3 area_digit_6 0.684934
    4 width_digit_7 0.665595
    5 area_digit_9 0.660227
    111 5 dist from cen -0.222527
    112 9 dist from cen -0.227945
    113 12 dist from cen -0.265251
    114 6 dist from cen -0.280505
    115 3 dist from cen -0.281658

    area_digit_8

    (Jump to top)
    area_digit_8 Train Validation
    0 count 28755.000000 292.000000
    1 mean 5697.203373 5814.003425
    2 std 2741.527329 2932.203603
    3 min 1054.000000 1505.000000
    4 25% 3760.000000 3986.250000
    5 50% 5145.000000 5342.500000
    6 75% 7000.000000 6720.750000
    7 max 29503.000000 25542.000000
    8 unique values 3884.000000 268.000000
    9 unique values / count 0.118500 0.740300
    10 NaNs 4022.000000 70.000000
    Most popular values (NaN = -999):
    area_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4022 70.0 0.12271 0.20115
    1 3600.0 65 1.0 0.00198 0.00287
    2 4320.0 60 3.0 0.00183 0.00862
    3 4368.0 58 0.0 0.00177 0.00000
    4 3696.0 58 0.0 0.00177 0.00000
    5 3150.0 51 0.0 0.00156 0.00000
    6 3360.0 51 0.0 0.00156 0.00000
    7 5096.0 50 0.0 0.00153 0.00000
    8 4080.0 50 0.0 0.00153 0.00000
    9 5940.0 49 0.0 0.00149 0.00000
    Most correlated values with area_digit_8:
    Column Correlation with area_digit_8
    0 area_digit_8 1.000000
    1 height_digit_8 0.731817
    2 width_digit_8 0.698923
    3 area_digit_7 0.697879
    4 area_digit_9 0.689404
    5 area_digit_6 0.653968
    111 11 dist from cen -0.229847
    112 9 dist from cen -0.258230
    113 6 dist from cen -0.263622
    114 3 dist from cen -0.263660
    115 12 dist from cen -0.270084

    area_digit_9

    (Jump to top)
    area_digit_9 Train Validation
    0 count 26974.000000 255.000000
    1 mean 5678.539964 5577.784314
    2 std 2563.651971 2316.266827
    3 min 870.000000 1628.000000
    4 25% 3848.000000 4022.000000
    5 50% 5280.000000 5184.000000
    6 75% 7038.000000 6856.000000
    7 max 34524.000000 14522.000000
    8 unique values 3707.000000 239.000000
    9 unique values / count 0.113100 0.660200
    10 NaNs 5803.000000 107.000000
    Most popular values (NaN = -999):
    area_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5803 107.0 0.17704 0.30141
    1 3600.0 65 1.0 0.00198 0.00282
    2 5940.0 59 1.0 0.00180 0.00282
    3 4368.0 55 0.0 0.00168 0.00000
    4 4320.0 54 0.0 0.00165 0.00000
    5 4590.0 52 0.0 0.00159 0.00000
    6 4536.0 52 1.0 0.00159 0.00282
    7 5096.0 50 0.0 0.00153 0.00000
    8 4080.0 49 0.0 0.00149 0.00000
    9 5040.0 48 0.0 0.00146 0.00000
    Most correlated values with area_digit_9:
    Column Correlation with area_digit_9
    0 area_digit_9 1.000000
    1 height_digit_9 0.748285
    2 area_digit_8 0.689404
    3 area_digit_6 0.669600
    4 area_digit_7 0.660227
    5 width_digit_9 0.633842
    111 9 dist from cen -0.226936
    112 2 dist from cen -0.230707
    113 6 dist from cen -0.262340
    114 3 dist from cen -0.264134
    115 12 dist from cen -0.276428

    area_digit_10

    (Jump to top)
    area_digit_10 Train Validation
    0 count 27882.000000 274.000000
    1 mean 6647.253927 6535.178832
    2 std 3161.975614 3025.937766
    3 min 888.000000 1435.000000
    4 25% 4380.000000 4366.500000
    5 50% 6075.000000 5913.500000
    6 75% 8232.000000 7945.750000
    7 max 35280.000000 18810.000000
    8 unique values 3733.000000 257.000000
    9 unique values / count 0.113900 0.709900
    10 NaNs 4895.000000 88.000000
    Most popular values (NaN = -999):
    area_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4895 88.0 0.14934 0.25287
    1 6160.0 54 1.0 0.00165 0.00287
    2 5544.0 51 1.0 0.00156 0.00287
    3 5040.0 50 2.0 0.00153 0.00575
    4 6480.0 48 0.0 0.00146 0.00000
    5 4680.0 48 0.0 0.00146 0.00000
    6 4620.0 47 0.0 0.00143 0.00000
    7 5460.0 45 1.0 0.00137 0.00287
    8 4800.0 44 0.0 0.00134 0.00000
    9 4158.0 41 1.0 0.00125 0.00287
    Most correlated values with area_digit_10:
    Column Correlation with area_digit_10
    0 area_digit_10 1.000000
    1 width_digit_10 0.856659
    2 height_digit_10 0.854912
    3 area_digit_11 0.646333
    4 area_digit_9 0.627542
    5 variance_area 0.626882
    111 between_digits_angle_ccw_sum -0.268909
    112 10 dist from cen -0.276785
    113 11 dist from cen -0.282686
    114 3 dist from cen -0.285387
    115 12 dist from cen -0.288583

    area_digit_11

    (Jump to top)
    area_digit_11 Train Validation
    0 count 27201.000000 279.000000
    1 mean 5393.460167 5250.641577
    2 std 2633.392241 2676.965577
    3 min 780.000000 1364.000000
    4 25% 3540.000000 3476.000000
    5 50% 4872.000000 4697.000000
    6 75% 6640.000000 6221.000000
    7 max 30338.000000 22490.000000
    8 unique values 3450.000000 266.000000
    9 unique values / count 0.105300 0.734800
    10 NaNs 5576.000000 83.000000
    Most popular values (NaN = -999):
    area_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5576 83.0 0.17012 0.23380
    1 5544.0 59 0.0 0.00180 0.00000
    2 4200.0 57 0.0 0.00174 0.00000
    3 4536.0 55 0.0 0.00168 0.00000
    4 5460.0 53 1.0 0.00162 0.00282
    5 3360.0 51 0.0 0.00156 0.00000
    6 3600.0 50 0.0 0.00153 0.00000
    7 4680.0 50 0.0 0.00153 0.00000
    8 4620.0 48 0.0 0.00146 0.00000
    9 5760.0 48 0.0 0.00146 0.00000
    Most correlated values with area_digit_11:
    Column Correlation with area_digit_11
    0 area_digit_11 1.000000
    1 width_digit_11 0.825194
    2 height_digit_11 0.813478
    3 area_digit_10 0.646333
    4 area_digit_9 0.578697
    5 width_digit_10 0.577713
    111 10 dist from cen -0.236584
    112 6 dist from cen -0.238165
    113 3 dist from cen -0.255290
    114 12 dist from cen -0.260157
    115 11 dist from cen -0.281544

    area_digit_12

    (Jump to top)
    area_digit_12 Train Validation
    0 count 28937.000000 291.000000
    1 mean 6998.064450 6944.563574
    2 std 3525.529979 3800.925618
    3 min 1089.000000 1748.000000
    4 25% 4473.000000 4552.000000
    5 50% 6240.000000 5916.000000
    6 75% 8701.000000 8414.000000
    7 max 28362.000000 23406.000000
    8 unique values 4332.000000 271.000000
    9 unique values / count 0.132200 0.748600
    10 NaNs 3840.000000 71.000000
    Most popular values (NaN = -999):
    area_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 3840 71.0 0.11716 0.20461
    1 5040.0 67 0.0 0.00204 0.00000
    2 5544.0 60 2.0 0.00183 0.00576
    3 4620.0 54 2.0 0.00165 0.00576
    4 6480.0 51 0.0 0.00156 0.00000
    5 4800.0 48 2.0 0.00146 0.00576
    6 5460.0 46 1.0 0.00140 0.00288
    7 4290.0 46 0.0 0.00140 0.00000
    8 6720.0 42 0.0 0.00128 0.00000
    9 3780.0 40 1.0 0.00122 0.00288
    Most correlated values with area_digit_12:
    Column Correlation with area_digit_12
    0 area_digit_12 1.000000
    1 width_digit_12 0.857415
    2 height_digit_12 0.832087
    3 variance_area 0.622523
    4 area_digit_3 0.619287
    5 area_digit_4 0.604102
    111 11 dist from cen -0.329968
    112 2 dist from cen -0.342764
    113 1 dist from cen -0.346086
    114 3 dist from cen -0.381927
    115 12 dist from cen -0.440424

    height_digit_1

    (Jump to top)
    height_digit_1 Train Validation
    0 count 25448.000000 255.000000
    1 mean 59.880541 60.486275
    2 std 20.742269 21.068664
    3 min 19.000000 22.000000
    4 25% 45.000000 47.000000
    5 50% 59.000000 59.000000
    6 75% 73.000000 75.000000
    7 max 143.000000 127.000000
    8 unique values 124.000000 82.000000
    9 unique values / count 0.003800 0.226500
    10 NaNs 7329.000000 107.000000
    Most popular values (NaN = -999):
    height_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 7329 107.0 0.22360 0.29558
    1 60.0 556 5.0 0.01696 0.01381
    2 59.0 555 5.0 0.01693 0.01381
    3 56.0 548 5.0 0.01672 0.01381
    4 61.0 538 6.0 0.01641 0.01657
    5 53.0 530 3.0 0.01617 0.00829
    6 62.0 523 2.0 0.01596 0.00552
    7 58.0 520 4.0 0.01586 0.01105
    8 63.0 514 4.0 0.01568 0.01105
    9 57.0 499 8.0 0.01522 0.02210
    Most correlated values with height_digit_1:
    Column Correlation with height_digit_1
    0 height_digit_1 1.000000
    1 height_digit_6 0.674511
    2 height_digit_4 0.674388
    3 height_digit_7 0.665798
    4 height_digit_8 0.664121
    5 height_digit_9 0.652534
    111 euc_dist_digit_11 -0.560891
    112 euc_dist_digit_1 -0.566982
    113 euc_dist_digit_12 -0.571832
    114 euc_dist_digit_3 -0.586916
    115 deviation_dist_from_mid_axis -0.589626

    height_digit_2

    (Jump to top)
    height_digit_2 Train Validation
    0 count 27855.000000 295.000000
    1 mean 70.994184 67.623729
    2 std 21.127381 18.071027
    3 min 21.000000 31.000000
    4 25% 56.000000 54.500000
    5 50% 68.000000 65.000000
    6 75% 83.000000 79.000000
    7 max 213.000000 122.000000
    8 unique values 163.000000 77.000000
    9 unique values / count 0.005000 0.212700
    10 NaNs 4922.000000 67.000000
    Most popular values (NaN = -999):
    height_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4922 67.0 0.15017 0.18508
    1 62.0 593 5.0 0.01809 0.01381
    2 61.0 593 10.0 0.01809 0.02762
    3 68.0 585 4.0 0.01785 0.01105
    4 63.0 576 11.0 0.01757 0.03039
    5 70.0 568 4.0 0.01733 0.01105
    6 64.0 568 8.0 0.01733 0.02210
    7 60.0 567 3.0 0.01730 0.00829
    8 69.0 564 6.0 0.01721 0.01657
    9 58.0 559 10.0 0.01705 0.02762
    Most correlated values with height_digit_2:
    Column Correlation with height_digit_2
    0 height_digit_2 1.000000
    1 area_digit_2 0.849204
    2 height_digit_3 0.635569
    3 area_digit_3 0.587931
    4 area_digit_4 0.577538
    5 height_digit_4 0.575405
    111 4 dist from cen -0.235030
    112 1 dist from cen -0.248394
    113 12 dist from cen -0.281569
    114 2 dist from cen -0.307129
    115 3 dist from cen -0.328162

    height_digit_3

    (Jump to top)
    height_digit_3 Train Validation
    0 count 28612.000000 297.000000
    1 mean 80.247973 78.521886
    2 std 23.071334 20.069820
    3 min 29.000000 37.000000
    4 25% 63.000000 64.000000
    5 50% 78.000000 76.000000
    6 75% 94.000000 91.000000
    7 max 212.000000 133.000000
    8 unique values 170.000000 82.000000
    9 unique values / count 0.005200 0.226500
    10 NaNs 4165.000000 65.000000
    Most popular values (NaN = -999):
    height_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4165 65.0 0.12707 0.17956
    1 80.0 538 5.0 0.01641 0.01381
    2 68.0 527 6.0 0.01608 0.01657
    3 70.0 523 4.0 0.01596 0.01105
    4 66.0 514 3.0 0.01568 0.00829
    5 67.0 513 3.0 0.01565 0.00829
    6 74.0 512 2.0 0.01562 0.00552
    7 75.0 511 6.0 0.01559 0.01657
    8 65.0 510 5.0 0.01556 0.01381
    9 81.0 508 2.0 0.01550 0.00552
    Most correlated values with height_digit_3:
    Column Correlation with height_digit_3
    0 height_digit_3 1.000000
    1 area_digit_3 0.798223
    2 height_digit_4 0.691319
    3 height_digit_6 0.678212
    4 height_digit_8 0.662582
    5 height_digit_5 0.648401
    111 euc_dist_digit_1 -0.338886
    112 euc_dist_digit_12 -0.340179
    113 euc_dist_digit_3 -0.348751
    114 deviation_dist_from_mid_axis -0.352680
    115 3 dist from cen -0.416407

    height_digit_4

    (Jump to top)
    height_digit_4 Train Validation
    0 count 27251.000000 269.000000
    1 mean 87.709479 86.037175
    2 std 26.389729 24.542503
    3 min 31.000000 36.000000
    4 25% 69.000000 69.000000
    5 50% 86.000000 84.000000
    6 75% 104.000000 103.000000
    7 max 241.000000 167.000000
    8 unique values 178.000000 92.000000
    9 unique values / count 0.005400 0.254100
    10 NaNs 5526.000000 93.000000
    Most popular values (NaN = -999):
    height_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5526 93.0 0.16859 0.25691
    1 83.0 450 3.0 0.01373 0.00829
    2 79.0 429 1.0 0.01309 0.00276
    3 77.0 428 6.0 0.01306 0.01657
    4 94.0 423 3.0 0.01291 0.00829
    5 87.0 421 4.0 0.01284 0.01105
    6 90.0 417 5.0 0.01272 0.01381
    7 84.0 415 4.0 0.01266 0.01105
    8 80.0 413 5.0 0.01260 0.01381
    9 81.0 413 4.0 0.01260 0.01105
    Most correlated values with height_digit_4:
    Column Correlation with height_digit_4
    0 height_digit_4 1.000000
    1 area_digit_4 0.766798
    2 height_digit_8 0.703974
    3 height_digit_6 0.702549
    4 height_digit_7 0.701881
    5 height_digit_3 0.691319
    111 euc_dist_digit_8 -0.435663
    112 euc_dist_digit_1 -0.438502
    113 euc_dist_digit_3 -0.443880
    114 euc_dist_digit_12 -0.445360
    115 deviation_dist_from_mid_axis -0.457691

    height_digit_5

    (Jump to top)
    height_digit_5 Train Validation
    0 count 26092.000000 265.000000
    1 mean 88.637130 86.283019
    2 std 23.653629 22.055523
    3 min 30.000000 37.000000
    4 25% 72.000000 70.000000
    5 50% 87.000000 85.000000
    6 75% 103.000000 100.000000
    7 max 218.000000 154.000000
    8 unique values 171.000000 85.000000
    9 unique values / count 0.005200 0.234800
    10 NaNs 6685.000000 97.000000
    Most popular values (NaN = -999):
    height_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 6685 97.0 0.20395 0.26796
    1 84.0 498 4.0 0.01519 0.01105
    2 90.0 475 5.0 0.01449 0.01381
    3 87.0 475 9.0 0.01449 0.02486
    4 89.0 471 2.0 0.01437 0.00552
    5 81.0 470 8.0 0.01434 0.02210
    6 83.0 457 6.0 0.01394 0.01657
    7 78.0 455 3.0 0.01388 0.00829
    8 88.0 450 6.0 0.01373 0.01657
    9 80.0 448 3.0 0.01367 0.00829
    Most correlated values with height_digit_5:
    Column Correlation with height_digit_5
    0 height_digit_5 1.000000
    1 area_digit_5 0.775551
    2 height_digit_4 0.665415
    3 height_digit_3 0.648401
    4 height_digit_8 0.629027
    5 height_digit_6 0.617553
    111 4 dist from cen -0.229819
    112 2 dist from cen -0.246729
    113 12 dist from cen -0.270386
    114 5 dist from cen -0.270411
    115 3 dist from cen -0.311705

    height_digit_6

    (Jump to top)
    height_digit_6 Train Validation
    0 count 28407.000000 290.000000
    1 mean 88.011054 89.400000
    2 std 27.994521 27.312342
    3 min 28.000000 33.000000
    4 25% 67.000000 70.000000
    5 50% 86.000000 88.500000
    6 75% 107.000000 107.750000
    7 max 225.000000 167.000000
    8 unique values 177.000000 103.000000
    9 unique values / count 0.005400 0.284500
    10 NaNs 4370.000000 72.000000
    Most popular values (NaN = -999):
    height_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4370 72.0 0.13333 0.19890
    1 86.0 429 5.0 0.01309 0.01381
    2 84.0 424 3.0 0.01294 0.00829
    3 93.0 411 4.0 0.01254 0.01105
    4 85.0 403 4.0 0.01230 0.01105
    5 79.0 401 2.0 0.01223 0.00552
    6 81.0 400 3.0 0.01220 0.00829
    7 83.0 397 2.0 0.01211 0.00552
    8 78.0 394 3.0 0.01202 0.00829
    9 91.0 394 2.0 0.01202 0.00552
    Most correlated values with height_digit_6:
    Column Correlation with height_digit_6
    0 height_digit_6 1.000000
    1 area_digit_6 0.781435
    2 height_digit_7 0.765061
    3 height_digit_8 0.734542
    4 height_digit_9 0.723395
    5 height_digit_4 0.702549
    111 euc_dist_digit_8 -0.497087
    112 euc_dist_digit_1 -0.499671
    113 euc_dist_digit_6 -0.504996
    114 euc_dist_digit_12 -0.514303
    115 deviation_dist_from_mid_axis -0.536082

    height_digit_7

    (Jump to top)
    height_digit_7 Train Validation
    0 count 28555.000000 288.000000
    1 mean 81.457468 81.940972
    2 std 24.236242 24.020461
    3 min 24.000000 30.000000
    4 25% 63.000000 63.750000
    5 50% 80.000000 82.000000
    6 75% 98.000000 97.000000
    7 max 195.000000 152.000000
    8 unique values 157.000000 98.000000
    9 unique values / count 0.004800 0.270700
    10 NaNs 4222.000000 74.000000
    Most popular values (NaN = -999):
    height_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4222 74.0 0.12881 0.20442
    1 80.0 470 5.0 0.01434 0.01381
    2 83.0 451 7.0 0.01376 0.01934
    3 86.0 451 1.0 0.01376 0.00276
    4 79.0 449 3.0 0.01370 0.00829
    5 78.0 446 3.0 0.01361 0.00829
    6 74.0 445 4.0 0.01358 0.01105
    7 85.0 442 6.0 0.01349 0.01657
    8 88.0 436 6.0 0.01330 0.01657
    9 75.0 434 2.0 0.01324 0.00552
    Most correlated values with height_digit_7:
    Column Correlation with height_digit_7
    0 height_digit_7 1.000000
    1 height_digit_6 0.765061
    2 height_digit_8 0.759517
    3 area_digit_7 0.746753
    4 height_digit_9 0.723819
    5 height_digit_4 0.701881
    111 euc_dist_digit_10 -0.484636
    112 euc_dist_digit_11 -0.488127
    113 euc_dist_digit_3 -0.489826
    114 euc_dist_digit_12 -0.501306
    115 deviation_dist_from_mid_axis -0.514352

    height_digit_8

    (Jump to top)
    height_digit_8 Train Validation
    0 count 28755.000000 292.000000
    1 mean 85.024761 86.708904
    2 std 25.677814 26.211685
    3 min 28.000000 31.000000
    4 25% 67.000000 70.000000
    5 50% 84.000000 84.500000
    6 75% 101.000000 102.000000
    7 max 256.000000 214.000000
    8 unique values 185.000000 96.000000
    9 unique values / count 0.005600 0.265200
    10 NaNs 4022.000000 70.000000
    Most popular values (NaN = -999):
    height_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4022 70.0 0.12271 0.19391
    1 82.0 504 2.0 0.01538 0.00554
    2 88.0 475 2.0 0.01449 0.00554
    3 83.0 458 3.0 0.01397 0.00831
    4 85.0 457 6.0 0.01394 0.01662
    5 76.0 455 7.0 0.01388 0.01939
    6 78.0 453 1.0 0.01382 0.00277
    7 89.0 452 5.0 0.01379 0.01385
    8 77.0 446 8.0 0.01361 0.02216
    9 84.0 445 7.0 0.01358 0.01939
    Most correlated values with height_digit_8:
    Column Correlation with height_digit_8
    0 height_digit_8 1.000000
    1 height_digit_7 0.759517
    2 height_digit_9 0.744959
    3 height_digit_6 0.734542
    4 area_digit_8 0.731817
    5 height_digit_4 0.703974
    111 euc_dist_digit_10 -0.471268
    112 euc_dist_digit_3 -0.471607
    113 euc_dist_digit_1 -0.474169
    114 euc_dist_digit_12 -0.477431
    115 deviation_dist_from_mid_axis -0.489259

    height_digit_9

    (Jump to top)
    height_digit_9 Train Validation
    0 count 26974.000000 255.000000
    1 mean 89.002818 90.662745
    2 std 27.629884 24.658055
    3 min 24.000000 37.000000
    4 25% 69.000000 74.000000
    5 50% 88.000000 91.000000
    6 75% 107.000000 106.000000
    7 max 271.000000 165.000000
    8 unique values 195.000000 92.000000
    9 unique values / count 0.005900 0.254100
    10 NaNs 5803.000000 107.000000
    Most popular values (NaN = -999):
    height_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5803 107.0 0.17704 0.29558
    1 91.0 428 8.0 0.01306 0.02210
    2 88.0 420 2.0 0.01281 0.00552
    3 90.0 404 3.0 0.01233 0.00829
    4 84.0 401 0.0 0.01223 0.00000
    5 97.0 393 3.0 0.01199 0.00829
    6 85.0 392 2.0 0.01196 0.00552
    7 83.0 382 4.0 0.01165 0.01105
    8 96.0 382 6.0 0.01165 0.01657
    9 75.0 381 3.0 0.01162 0.00829
    Most correlated values with height_digit_9:
    Column Correlation with height_digit_9
    0 height_digit_9 1.000000
    1 area_digit_9 0.748285
    2 height_digit_8 0.744959
    3 height_digit_7 0.723819
    4 height_digit_6 0.723395
    5 height_digit_4 0.680362
    111 euc_dist_digit_11 -0.479624
    112 euc_dist_digit_10 -0.480029
    113 euc_dist_digit_3 -0.484704
    114 euc_dist_digit_12 -0.490168
    115 deviation_dist_from_mid_axis -0.508935

    height_digit_10

    (Jump to top)
    height_digit_10 Train Validation
    0 count 27882.000000 274.000000
    1 mean 81.400330 80.317518
    2 std 21.714878 20.292312
    3 min 31.000000 35.000000
    4 25% 66.000000 67.000000
    5 50% 79.000000 79.000000
    6 75% 94.000000 89.750000
    7 max 213.000000 165.000000
    8 unique values 162.000000 86.000000
    9 unique values / count 0.004900 0.237600
    10 NaNs 4895.000000 88.000000
    Most popular values (NaN = -999):
    height_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4895 88.0 0.14934 0.24309
    1 76.0 597 7.0 0.01821 0.01934
    2 75.0 588 6.0 0.01794 0.01657
    3 71.0 548 7.0 0.01672 0.01934
    4 69.0 545 6.0 0.01663 0.01657
    5 73.0 543 6.0 0.01657 0.01657
    6 78.0 540 7.0 0.01647 0.01934
    7 82.0 537 5.0 0.01638 0.01381
    8 79.0 535 6.0 0.01632 0.01657
    9 70.0 534 9.0 0.01629 0.02486
    Most correlated values with height_digit_10:
    Column Correlation with height_digit_10
    0 height_digit_10 1.000000
    1 area_digit_10 0.854912
    2 height_digit_11 0.680040
    3 height_digit_8 0.571600
    4 area_digit_11 0.565602
    5 height_digit_5 0.562692
    111 10 dist from cen -0.191549
    112 5 dist from cen -0.194954
    113 11 dist from cen -0.201434
    114 12 dist from cen -0.226314
    115 3 dist from cen -0.251285

    height_digit_11

    (Jump to top)
    height_digit_11 Train Validation
    0 count 27201.000000 279.000000
    1 mean 75.654792 74.444444
    2 std 20.775003 18.839428
    3 min 26.000000 28.000000
    4 25% 61.000000 62.000000
    5 50% 73.000000 73.000000
    6 75% 88.000000 86.000000
    7 max 241.000000 173.000000
    8 unique values 151.000000 77.000000
    9 unique values / count 0.004600 0.212700
    10 NaNs 5576.000000 83.000000
    Most popular values (NaN = -999):
    height_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5576 83.0 0.17012 0.22992
    1 67.0 595 6.0 0.01815 0.01662
    2 71.0 588 8.0 0.01794 0.02216
    3 69.0 585 10.0 0.01785 0.02770
    4 68.0 557 4.0 0.01699 0.01108
    5 72.0 553 3.0 0.01687 0.00831
    6 65.0 553 8.0 0.01687 0.02216
    7 64.0 547 6.0 0.01669 0.01662
    8 75.0 545 5.0 0.01663 0.01385
    9 73.0 544 7.0 0.01660 0.01939
    Most correlated values with height_digit_11:
    Column Correlation with height_digit_11
    0 height_digit_11 1.000000
    1 area_digit_11 0.813478
    2 height_digit_10 0.680040
    3 height_digit_8 0.619594
    4 height_digit_9 0.615626
    5 height_digit_6 0.604576
    111 euc_dist_digit_3 -0.278515
    112 deviation_dist_from_mid_axis -0.282200
    113 euc_dist_digit_1 -0.288908
    114 euc_dist_digit_12 -0.290975
    115 3 dist from cen -0.305179

    height_digit_12

    (Jump to top)
    height_digit_12 Train Validation
    0 count 28937.000000 291.000000
    1 mean 77.071742 77.474227
    2 std 21.834246 22.736311
    3 min 28.000000 35.000000
    4 25% 62.000000 62.500000
    5 50% 74.000000 73.000000
    6 75% 89.000000 88.000000
    7 max 287.000000 157.000000
    8 unique values 169.000000 92.000000
    9 unique values / count 0.005200 0.254100
    10 NaNs 3840.000000 71.000000
    Most popular values (NaN = -999):
    height_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 3840 71.0 0.11716 0.19613
    1 70.0 647 5.0 0.01974 0.01381
    2 71.0 631 2.0 0.01925 0.00552
    3 66.0 624 9.0 0.01904 0.02486
    4 67.0 623 11.0 0.01901 0.03039
    5 72.0 622 10.0 0.01898 0.02762
    6 69.0 610 6.0 0.01861 0.01657
    7 73.0 594 5.0 0.01812 0.01381
    8 65.0 583 8.0 0.01779 0.02210
    9 64.0 577 8.0 0.01760 0.02210
    Most correlated values with height_digit_12:
    Column Correlation with height_digit_12
    0 height_digit_12 1.000000
    1 area_digit_12 0.832087
    2 area_digit_3 0.558984
    3 area_digit_2 0.543378
    4 width_digit_3 0.531169
    5 area_digit_4 0.530821
    111 2 dist from cen -0.280830
    112 1 dist from cen -0.301515
    113 11 dist from cen -0.302116
    114 6 dist from cen -0.326151
    115 12 dist from cen -0.412872

    width_digit_1

    (Jump to top)
    width_digit_1 Train Validation
    0 count 25448.000000 255.000000
    1 mean 40.342109 40.905882
    2 std 17.562823 17.562682
    3 min 18.000000 21.000000
    4 25% 29.000000 30.000000
    5 50% 34.000000 35.000000
    6 75% 47.000000 48.000000
    7 max 164.000000 127.000000
    8 unique values 127.000000 65.000000
    9 unique values / count 0.003900 0.179600
    10 NaNs 7329.000000 107.000000
    Most popular values (NaN = -999):
    width_digit_1 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 7329 107.0 0.22360 0.29558
    1 31.0 1287 13.0 0.03927 0.03591
    2 30.0 1237 17.0 0.03774 0.04696
    3 32.0 1189 16.0 0.03628 0.04420
    4 28.0 1165 9.0 0.03554 0.02486
    5 29.0 1161 9.0 0.03542 0.02486
    6 27.0 1064 6.0 0.03246 0.01657
    7 33.0 1063 4.0 0.03243 0.01105
    8 26.0 1056 9.0 0.03222 0.02486
    9 34.0 1011 12.0 0.03084 0.03315
    Most correlated values with width_digit_1:
    Column Correlation with width_digit_1
    0 width_digit_1 1.000000
    1 deviation_dist_from_mid_axis 0.661077
    2 width_digit_9 0.658588
    3 euc_dist_digit_12 0.657694
    4 euc_dist_digit_1 0.647471
    5 euc_dist_digit_11 0.639541
    111 height_digit_7 -0.244595
    112 sequence_flag_cw -0.250314
    113 height_digit_6 -0.252969
    114 number_of_digits -0.271456
    115 height_digit_1 -0.295377

    width_digit_2

    (Jump to top)
    width_digit_2 Train Validation
    0 count 27855.000000 295.000000
    1 mean 62.648717 59.725424
    2 std 17.698763 16.272839
    3 min 26.000000 33.000000
    4 25% 50.000000 49.000000
    5 50% 60.000000 56.000000
    6 75% 72.000000 69.000000
    7 max 193.000000 137.000000
    8 unique values 141.000000 68.000000
    9 unique values / count 0.004300 0.187800
    10 NaNs 4922.000000 67.000000
    Most popular values (NaN = -999):
    width_digit_2 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4922 67.0 0.15017 0.18508
    1 53.0 758 11.0 0.02313 0.03039
    2 57.0 734 9.0 0.02239 0.02486
    3 51.0 726 8.0 0.02215 0.02210
    4 54.0 718 6.0 0.02191 0.01657
    5 59.0 716 5.0 0.02184 0.01381
    6 55.0 711 9.0 0.02169 0.02486
    7 50.0 711 12.0 0.02169 0.03315
    8 49.0 705 8.0 0.02151 0.02210
    9 56.0 695 7.0 0.02120 0.01934
    Most correlated values with width_digit_2:
    Column Correlation with width_digit_2
    0 width_digit_2 1.000000
    1 area_digit_2 0.826914
    2 width_digit_3 0.555155
    3 area_digit_3 0.525885
    4 area_digit_4 0.520994
    5 width_digit_4 0.516818
    111 6 dist from cen -0.201849
    112 3 dist from cen -0.208258
    113 1 dist from cen -0.217314
    114 2 dist from cen -0.234016
    115 12 dist from cen -0.256382

    width_digit_3

    (Jump to top)
    width_digit_3 Train Validation
    0 count 28612.000000 297.000000
    1 mean 61.032259 61.508418
    2 std 19.128573 21.684020
    3 min 23.000000 30.000000
    4 25% 47.000000 47.000000
    5 50% 57.000000 55.000000
    6 75% 71.000000 71.000000
    7 max 185.000000 163.000000
    8 unique values 147.000000 80.000000
    9 unique values / count 0.004500 0.221000
    10 NaNs 4165.000000 65.000000
    Most popular values (NaN = -999):
    width_digit_3 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4165 65.0 0.12707 0.18006
    1 52.0 757 10.0 0.02310 0.02770
    2 49.0 741 8.0 0.02261 0.02216
    3 47.0 736 8.0 0.02245 0.02216
    4 48.0 736 6.0 0.02245 0.01662
    5 51.0 734 5.0 0.02239 0.01385
    6 54.0 724 8.0 0.02209 0.02216
    7 53.0 717 10.0 0.02188 0.02770
    8 50.0 708 11.0 0.02160 0.03047
    9 55.0 691 9.0 0.02108 0.02493
    Most correlated values with width_digit_3:
    Column Correlation with width_digit_3
    0 width_digit_3 1.000000
    1 area_digit_3 0.807082
    2 width_digit_4 0.665628
    3 width_digit_6 0.636762
    4 width_digit_9 0.628038
    5 width_digit_7 0.627355
    111 3 dist from cen -0.183947
    112 11 dist from cen -0.185231
    113 6 dist from cen -0.224250
    114 12 dist from cen -0.242644
    115 between_digits_angle_ccw_sum -0.322870

    width_digit_4

    (Jump to top)
    width_digit_4 Train Validation
    0 count 27251.000000 269.000000
    1 mean 65.411545 64.591078
    2 std 19.661431 19.690991
    3 min 26.000000 35.000000
    4 25% 52.000000 51.000000
    5 50% 61.000000 59.000000
    6 75% 75.000000 74.000000
    7 max 204.000000 174.000000
    8 unique values 154.000000 70.000000
    9 unique values / count 0.004700 0.193400
    10 NaNs 5526.000000 93.000000
    Most popular values (NaN = -999):
    width_digit_4 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5526 93.0 0.16859 0.25691
    1 57.0 756 16.0 0.02306 0.04420
    2 55.0 739 12.0 0.02255 0.03315
    3 56.0 736 9.0 0.02245 0.02486
    4 51.0 723 10.0 0.02206 0.02762
    5 54.0 706 8.0 0.02154 0.02210
    6 58.0 690 4.0 0.02105 0.01105
    7 52.0 686 6.0 0.02093 0.01657
    8 53.0 680 7.0 0.02075 0.01934
    9 60.0 670 3.0 0.02044 0.00829
    Most correlated values with width_digit_4:
    Column Correlation with width_digit_4
    0 width_digit_4 1.000000
    1 width_digit_7 0.692726
    2 area_digit_4 0.691913
    3 width_digit_9 0.686043
    4 width_digit_6 0.682627
    5 width_digit_8 0.678608
    111 11 dist from cen -0.191231
    112 number_of_digits -0.199790
    113 7 dist from cen -0.205595
    114 12 dist from cen -0.240505
    115 6 dist from cen -0.243615

    width_digit_5

    (Jump to top)
    width_digit_5 Train Validation
    0 count 26092.000000 265.000000
    1 mean 79.223402 75.630189
    2 std 24.229453 21.386926
    3 min 32.000000 35.000000
    4 25% 61.000000 60.000000
    5 50% 75.000000 74.000000
    6 75% 93.000000 88.000000
    7 max 220.000000 179.000000
    8 unique values 177.000000 84.000000
    9 unique values / count 0.005400 0.232000
    10 NaNs 6685.000000 97.000000
    Most popular values (NaN = -999):
    width_digit_5 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 6685 97.0 0.20395 0.26796
    1 71.0 522 1.0 0.01593 0.00276
    2 69.0 498 3.0 0.01519 0.00829
    3 67.0 493 3.0 0.01504 0.00829
    4 62.0 482 6.0 0.01471 0.01657
    5 73.0 475 4.0 0.01449 0.01105
    6 61.0 474 8.0 0.01446 0.02210
    7 64.0 474 6.0 0.01446 0.01657
    8 68.0 470 5.0 0.01434 0.01381
    9 70.0 469 7.0 0.01431 0.01934
    Most correlated values with width_digit_5:
    Column Correlation with width_digit_5
    0 width_digit_5 1.000000
    1 area_digit_5 0.830252
    2 width_digit_4 0.578648
    3 width_digit_3 0.568414
    4 width_digit_8 0.548907
    5 width_digit_6 0.545802
    111 9 dist from cen -0.147804
    112 10 dist from cen -0.151580
    113 11 dist from cen -0.160539
    114 6 dist from cen -0.184585
    115 12 dist from cen -0.193835

    width_digit_6

    (Jump to top)
    width_digit_6 Train Validation
    0 count 28407.000000 290.000000
    1 mean 68.243391 67.565517
    2 std 21.233585 20.254715
    3 min 23.000000 33.000000
    4 25% 54.000000 54.250000
    5 50% 64.000000 64.000000
    6 75% 78.000000 76.000000
    7 max 209.000000 167.000000
    8 unique values 169.000000 79.000000
    9 unique values / count 0.005200 0.218200
    10 NaNs 4370.000000 72.000000
    Most popular values (NaN = -999):
    width_digit_6 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4370 72.0 0.13333 0.19890
    1 59.0 707 8.0 0.02157 0.02210
    2 61.0 697 9.0 0.02126 0.02486
    3 58.0 671 6.0 0.02047 0.01657
    4 60.0 669 8.0 0.02041 0.02210
    5 57.0 663 5.0 0.02023 0.01381
    6 64.0 660 9.0 0.02014 0.02486
    7 66.0 652 9.0 0.01989 0.02486
    8 63.0 651 4.0 0.01986 0.01105
    9 65.0 648 6.0 0.01977 0.01657
    Most correlated values with width_digit_6:
    Column Correlation with width_digit_6
    0 width_digit_6 1.000000
    1 width_digit_9 0.720412
    2 width_digit_7 0.713691
    3 width_digit_8 0.691108
    4 width_digit_4 0.682627
    5 width_digit_3 0.636762
    111 vert_count -0.165468
    112 11 dist from cen -0.179207
    113 7 dist from cen -0.194884
    114 12 dist from cen -0.215129
    115 6 dist from cen -0.262488

    width_digit_7

    (Jump to top)
    width_digit_7 Train Validation
    0 count 28555.000000 288.000000
    1 mean 60.447242 58.857639
    2 std 19.571260 18.916206
    3 min 21.000000 27.000000
    4 25% 47.000000 45.000000
    5 50% 57.000000 56.000000
    6 75% 70.000000 68.000000
    7 max 206.000000 139.000000
    8 unique values 148.000000 76.000000
    9 unique values / count 0.004500 0.209900
    10 NaNs 4222.000000 74.000000
    Most popular values (NaN = -999):
    width_digit_7 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4222 74.0 0.12881 0.20442
    1 52.0 778 7.0 0.02374 0.01934
    2 50.0 763 6.0 0.02328 0.01657
    3 49.0 744 7.0 0.02270 0.01934
    4 53.0 724 7.0 0.02209 0.01934
    5 51.0 722 4.0 0.02203 0.01105
    6 46.0 719 7.0 0.02194 0.01934
    7 56.0 698 8.0 0.02130 0.02210
    8 54.0 694 3.0 0.02117 0.00829
    9 47.0 689 5.0 0.02102 0.01381
    Most correlated values with width_digit_7:
    Column Correlation with width_digit_7
    0 width_digit_7 1.000000
    1 width_digit_8 0.742913
    2 width_digit_9 0.727928
    3 width_digit_6 0.713691
    4 width_digit_4 0.692726
    5 area_digit_7 0.665595
    111 8 dist from cen -0.187548
    112 12 dist from cen -0.196933
    113 7 dist from cen -0.206359
    114 number_of_digits -0.224091
    115 6 dist from cen -0.259507

    width_digit_8

    (Jump to top)
    width_digit_8 Train Validation
    0 count 28755.000000 292.000000
    1 mean 66.558233 66.359589
    2 std 22.255969 22.241045
    3 min 24.000000 31.000000
    4 25% 50.000000 51.000000
    5 50% 62.000000 61.000000
    6 75% 78.000000 75.000000
    7 max 277.000000 198.000000
    8 unique values 166.000000 80.000000
    9 unique values / count 0.005100 0.221000
    10 NaNs 4022.000000 70.000000
    Most popular values (NaN = -999):
    width_digit_8 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4022 70.0 0.12271 0.19391
    1 53.0 692 6.0 0.02111 0.01662
    2 50.0 682 11.0 0.02081 0.03047
    3 51.0 671 12.0 0.02047 0.03324
    4 56.0 665 9.0 0.02029 0.02493
    5 49.0 644 7.0 0.01965 0.01939
    6 55.0 644 5.0 0.01965 0.01385
    7 58.0 643 4.0 0.01962 0.01108
    8 48.0 643 6.0 0.01962 0.01662
    9 57.0 635 16.0 0.01937 0.04432
    Most correlated values with width_digit_8:
    Column Correlation with width_digit_8
    0 width_digit_8 1.000000
    1 width_digit_9 0.750364
    2 width_digit_7 0.742913
    3 area_digit_8 0.698923
    4 width_digit_6 0.691108
    5 width_digit_4 0.678608
    111 sequence_flag_cw -0.221696
    112 7 dist from cen -0.229184
    113 6 dist from cen -0.241545
    114 number_of_digits -0.265076
    115 between_digits_angle_ccw_sum -0.320684

    width_digit_9

    (Jump to top)
    width_digit_9 Train Validation
    0 count 26974.000000 255.000000
    1 mean 63.786943 60.925490
    2 std 21.164594 17.780329
    3 min 25.000000 33.000000
    4 25% 50.000000 50.000000
    5 50% 59.000000 58.000000
    6 75% 72.000000 69.000000
    7 max 207.000000 145.000000
    8 unique values 161.000000 67.000000
    9 unique values / count 0.004900 0.185100
    10 NaNs 5803.000000 107.000000
    Most popular values (NaN = -999):
    width_digit_9 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5803 107.0 0.17704 0.29558
    1 54.0 780 10.0 0.02380 0.02762
    2 52.0 743 14.0 0.02267 0.03867
    3 51.0 711 8.0 0.02169 0.02210
    4 56.0 708 7.0 0.02160 0.01934
    5 53.0 703 9.0 0.02145 0.02486
    6 57.0 699 2.0 0.02133 0.00552
    7 55.0 690 4.0 0.02105 0.01105
    8 58.0 683 12.0 0.02084 0.03315
    9 59.0 682 7.0 0.02081 0.01934
    Most correlated values with width_digit_9:
    Column Correlation with width_digit_9
    0 width_digit_9 1.000000
    1 width_digit_8 0.750364
    2 width_digit_7 0.727928
    3 width_digit_6 0.720412
    4 width_digit_4 0.686043
    5 width_digit_1 0.658588
    111 8 dist from cen -0.177001
    112 sequence_flag_cw -0.177594
    113 12 dist from cen -0.204242
    114 7 dist from cen -0.213543
    115 6 dist from cen -0.232948

    width_digit_10

    (Jump to top)
    width_digit_10 Train Validation
    0 count 27882.000000 274.000000
    1 mean 78.795890 78.572993
    2 std 21.110245 20.855955
    3 min 24.000000 40.000000
    4 25% 64.000000 63.000000
    5 50% 76.000000 77.500000
    6 75% 91.000000 89.000000
    7 max 219.000000 145.000000
    8 unique values 159.000000 85.000000
    9 unique values / count 0.004900 0.234800
    10 NaNs 4895.000000 88.000000
    Most popular values (NaN = -999):
    width_digit_10 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 4895 88.0 0.14934 0.24309
    1 73.0 597 2.0 0.01821 0.00552
    2 70.0 595 8.0 0.01815 0.02210
    3 67.0 584 6.0 0.01782 0.01657
    4 71.0 584 3.0 0.01782 0.00829
    5 74.0 566 3.0 0.01727 0.00829
    6 72.0 557 4.0 0.01699 0.01105
    7 78.0 553 5.0 0.01687 0.01381
    8 66.0 551 7.0 0.01681 0.01934
    9 76.0 549 7.0 0.01675 0.01934
    Most correlated values with width_digit_10:
    Column Correlation with width_digit_10
    0 width_digit_10 1.000000
    1 area_digit_10 0.856659
    2 width_digit_11 0.629837
    3 area_digit_11 0.577713
    4 area_digit_9 0.561415
    5 area_digit_8 0.559530
    111 9 dist from cen -0.281388
    112 12 dist from cen -0.290839
    113 10 dist from cen -0.295011
    114 11 dist from cen -0.297822
    115 between_digits_angle_ccw_sum -0.419332

    width_digit_11

    (Jump to top)
    width_digit_11 Train Validation
    0 count 27201.000000 279.000000
    1 mean 69.117569 68.064516
    2 std 20.374876 20.990820
    3 min 24.000000 31.000000
    4 25% 54.000000 52.000000
    5 50% 66.000000 65.000000
    6 75% 80.000000 79.000000
    7 max 197.000000 142.000000
    8 unique values 162.000000 82.000000
    9 unique values / count 0.004900 0.226500
    10 NaNs 5576.000000 83.000000
    Most popular values (NaN = -999):
    width_digit_11 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 5576 83.0 0.17012 0.22928
    1 58.0 651 6.0 0.01986 0.01657
    2 57.0 620 7.0 0.01892 0.01934
    3 61.0 618 6.0 0.01885 0.01657
    4 59.0 608 4.0 0.01855 0.01105
    5 65.0 605 5.0 0.01846 0.01381
    6 60.0 602 6.0 0.01837 0.01657
    7 62.0 596 5.0 0.01818 0.01381
    8 54.0 587 3.0 0.01791 0.00829
    9 64.0 586 6.0 0.01788 0.01657
    Most correlated values with width_digit_11:
    Column Correlation with width_digit_11
    0 width_digit_11 1.000000
    1 area_digit_11 0.825194
    2 width_digit_10 0.629837
    3 width_digit_9 0.576718
    4 width_digit_8 0.572796
    5 width_digit_7 0.567187
    111 9 dist from cen -0.199445
    112 10 dist from cen -0.213663
    113 12 dist from cen -0.223063
    114 6 dist from cen -0.232563
    115 11 dist from cen -0.250970

    width_digit_12

    (Jump to top)
    width_digit_12 Train Validation
    0 count 28937.000000 291.000000
    1 mean 87.386426 86.006873
    2 std 25.649241 28.171763
    3 min 30.000000 39.000000
    4 25% 69.000000 67.000000
    5 50% 84.000000 81.000000
    6 75% 102.000000 98.000000
    7 max 220.000000 249.000000
    8 unique values 180.000000 95.000000
    9 unique values / count 0.005500 0.262400
    10 NaNs 3840.000000 71.000000
    Most popular values (NaN = -999):
    width_digit_12 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.0 3840 71.0 0.11716 0.19668
    1 76.0 543 4.0 0.01657 0.01108
    2 77.0 535 6.0 0.01632 0.01662
    3 78.0 519 7.0 0.01583 0.01939
    4 73.0 498 12.0 0.01519 0.03324
    5 70.0 497 6.0 0.01516 0.01662
    6 79.0 494 0.0 0.01507 0.00000
    7 74.0 493 8.0 0.01504 0.02216
    8 75.0 487 5.0 0.01486 0.01385
    9 80.0 480 6.0 0.01464 0.01662
    Most correlated values with width_digit_12:
    Column Correlation with width_digit_12
    0 width_digit_12 1.000000
    1 area_digit_12 0.857415
    2 variance_width 0.659845
    3 height_digit_3 0.552388
    4 area_digit_6 0.527116
    5 area_digit_4 0.523080
    111 2 dist from cen -0.308594
    112 euc_dist_digit_3 -0.318233
    113 12 dist from cen -0.351274
    114 3 dist from cen -0.389002
    115 between_digits_angle_ccw_sum -0.392715

    variance_width

    (Jump to top)
    variance_width Train Validation
    0 count 32313.000000 346.000000
    1 mean 363.578878 350.985762
    2 std 306.449113 318.828425
    3 min 0.000000 3.000000
    4 25% 171.515152 138.458081
    5 50% 282.787879 256.117424
    6 75% 456.363636 438.815404
    7 max 5408.000000 2714.636364
    8 unique values 24756.000000 347.000000
    9 unique values / count 0.755300 0.958600
    10 NaNs 464.000000 16.000000
    Most popular values (NaN = -999):
    variance_width Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 464 16.0 0.01416 0.1
    1 60.500000 12 0.0 0.00037 0.0
    2 40.500000 12 0.0 0.00037 0.0
    3 0.000000 11 0.0 0.00034 0.0
    4 98.000000 10 0.0 0.00031 0.0
    5 0.500000 10 0.0 0.00031 0.0
    6 242.000000 9 0.0 0.00027 0.0
    7 204.628788 9 0.0 0.00027 0.0
    8 180.500000 9 0.0 0.00027 0.0
    9 12.500000 9 0.0 0.00027 0.0
    Most correlated values with variance_width:
    Column Correlation with variance_width
    0 variance_width 1.000000
    1 variance_area 0.698479
    2 width_digit_12 0.659845
    3 area_digit_12 0.597185
    4 area_digit_5 0.550318
    5 width_digit_10 0.546107
    112 9 dist from cen -0.209059
    113 11 dist from cen -0.213018
    114 10 dist from cen -0.217875
    115 12 dist from cen -0.239418
    116 3 dist from cen -0.255525

    variance_height

    (Jump to top)
    variance_height Train Validation
    0 count 32313.000000 346.000000
    1 mean 324.115546 333.866598
    2 std 302.509846 315.365341
    3 min 0.000000 16.333333
    4 25% 148.277778 143.956061
    5 50% 246.386364 247.450000
    6 75% 404.100000 411.765909
    7 max 6844.500000 2840.333333
    8 unique values 24131.000000 341.000000
    9 unique values / count 0.736200 0.942000
    10 NaNs 464.000000 16.000000
    Most popular values (NaN = -999):
    variance_height Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 464 16.0 0.01416 0.09697
    1 84.500000 14 0.0 0.00043 0.00000
    2 128.000000 13 1.0 0.00040 0.00606
    3 2.000000 11 0.0 0.00034 0.00000
    4 212.810606 10 0.0 0.00031 0.00000
    5 98.000000 10 0.0 0.00031 0.00000
    6 50.000000 10 0.0 0.00031 0.00000
    7 0.500000 9 0.0 0.00027 0.00000
    8 18.000000 9 3.0 0.00027 0.01818
    9 144.500000 9 0.0 0.00027 0.00000
    Most correlated values with variance_height:
    Column Correlation with variance_height
    0 variance_height 1.000000
    1 variance_area 0.647750
    2 area_digit_9 0.510021
    3 area_digit_8 0.495640
    4 height_digit_5 0.493978
    5 area_digit_5 0.488242
    112 11 dist from cen -0.144312
    113 12 dist from cen -0.162829
    114 5 dist from cen -0.169988
    115 6 dist from cen -0.195627
    116 number_of_digits -0.221174

    variance_area

    (Jump to top)
    variance_area Train Validation
    0 count 3.231300e+04 3.460000e+02
    1 mean 5.148403e+06 4.784651e+06
    2 std 6.805541e+06 5.348901e+06
    3 min 0.000000e+00 8.401530e+04
    4 25% 1.575504e+06 1.389025e+06
    5 50% 3.114518e+06 2.984926e+06
    6 75% 6.090066e+06 6.274740e+06
    7 max 1.530196e+08 3.639078e+07
    8 unique values 3.229700e+04 3.470000e+02
    9 unique values / count 9.854000e-01 9.586000e-01
    10 NaNs 4.640000e+02 1.600000e+01
    Most popular values (NaN = -999):
    variance_area Count in train (desc) Count in validation Share in train Share in validation
    0 -9.990000e+02 464 16.0 0.01416 1.0
    1 0.000000e+00 7 0.0 0.00021 0.0
    2 1.620000e+06 2 0.0 0.00006 0.0
    3 1.980050e+04 2 0.0 0.00006 0.0
    4 1.367858e+06 2 0.0 0.00006 0.0
    5 1.729800e+04 2 0.0 0.00006 0.0
    6 1.229312e+06 2 0.0 0.00006 0.0
    7 7.372800e+04 2 0.0 0.00006 0.0
    8 9.331888e+05 2 0.0 0.00006 0.0
    9 1.891740e+07 2 0.0 0.00006 0.0
    Most correlated values with variance_area:
    Column Correlation with variance_area
    0 variance_area 1.000000
    1 variance_width 0.698479
    2 area_digit_5 0.678825
    3 variance_height 0.647750
    4 area_digit_10 0.626882
    5 area_digit_12 0.622523
    112 11 dist from cen -0.236975
    113 number_of_digits -0.239228
    114 6 dist from cen -0.243035
    115 3 dist from cen -0.257365
    116 12 dist from cen -0.267011

    deviation_dist_from_mid_axis

    (Jump to top)
    deviation_dist_from_mid_axis Train Validation
    0 count 32198.000000 342.000000
    1 mean 32.202820 33.825500
    2 std 32.276635 31.551045
    3 min 0.000000 1.072500
    4 25% 9.880000 10.530000
    5 50% 16.163333 18.286667
    6 75% 44.200000 54.979167
    7 max 125.710000 114.205000
    8 unique values 4337.000000 307.000000
    9 unique values / count 0.132300 0.848100
    10 NaNs 579.000000 20.000000
    Most popular values (NaN = -999):
    deviation_dist_from_mid_axis Count in train (desc) Count in validation Share in train Share in validation
    0 -999.00 579 20.0 0.01766 0.05731
    1 10.27 74 0.0 0.00226 0.00000
    2 10.01 70 2.0 0.00214 0.00573
    3 8.58 69 0.0 0.00211 0.00000
    4 9.36 68 0.0 0.00207 0.00000
    5 10.40 67 0.0 0.00204 0.00000
    6 8.32 63 0.0 0.00192 0.00000
    7 9.10 63 1.0 0.00192 0.00287
    8 7.80 61 0.0 0.00186 0.00000
    9 8.97 61 2.0 0.00186 0.00573
    Most correlated values with deviation_dist_from_mid_axis:
    Column Correlation with deviation_dist_from_mid_axis
    0 deviation_dist_from_mid_axis 1.000000
    1 euc_dist_digit_9 0.946361
    2 euc_dist_digit_12 0.942951
    3 euc_dist_digit_3 0.938323
    4 euc_dist_digit_6 0.930912
    5 euc_dist_digit_8 0.905678
    112 height_digit_8 -0.489259
    113 height_digit_9 -0.508935
    114 height_digit_7 -0.514352
    115 height_digit_6 -0.536082
    116 height_digit_1 -0.589626

    between_axis_digits_angle_sum

    (Jump to top)
    between_axis_digits_angle_sum Train Validation
    0 count 29141.000000 305.000000
    1 mean 352.139508 344.655738
    2 std 52.263430 72.841502
    3 min 0.000000 0.000000
    4 25% 360.000000 360.000000
    5 50% 360.000000 360.000000
    6 75% 360.000000 360.000000
    7 max 360.000000 360.000000
    8 unique values 75.000000 3.000000
    9 unique values / count 0.002300 0.008300
    10 NaNs 3636.000000 57.000000
    Most popular values (NaN = -999):
    between_axis_digits_angle_sum Count in train (desc) Count in validation Share in train Share in validation
    0 3.600000e+02 28431 292.0 0.86741 0.80663
    1 -9.990000e+02 3636 57.0 0.11093 0.15746
    2 0.000000e+00 621 13.0 0.01895 0.03591
    3 1.800000e+02 18 0.0 0.00055 0.00000
    4 1.420000e-14 1 0.0 0.00003 0.00000
    5 3.527213e+02 1 0.0 0.00003 0.00000
    6 3.546852e+02 1 0.0 0.00003 0.00000
    7 3.546193e+02 1 0.0 0.00003 0.00000
    8 3.540898e+02 1 0.0 0.00003 0.00000
    9 3.539915e+02 1 0.0 0.00003 0.00000
    Most correlated values with between_axis_digits_angle_sum:
    Column Correlation with between_axis_digits_angle_sum
    0 between_axis_digits_angle_sum 1.000000
    1 between_digits_angle_ccw_sum 0.550978
    2 between_digits_angle_cw_sum 0.520155
    3 number_of_digits 0.398671
    4 count_defects 0.137624
    5 vert_count 0.135043
    112 missing_digit_9 -0.261707
    113 between_digits_angle_cw_var -0.278970
    114 between_digits_angle_ccw_var -0.279401
    115 missing_digit_3 -0.340519
    116 missing_digit_6 -0.342443

    between_axis_digits_angle_var

    (Jump to top)
    between_axis_digits_angle_var Train Validation
    0 count 30870.000000 314.000000
    1 mean 2587.128279 3628.997025
    2 std 5675.602203 7408.944854
    3 min 0.000000 0.000000
    4 25% 102.884207 137.498073
    5 50% 296.051705 448.817129
    6 75% 2402.861831 2978.269854
    7 max 63116.006320 46608.017880
    8 unique values 30817.000000 313.000000
    9 unique values / count 0.940200 0.864600
    10 NaNs 1907.000000 48.000000
    Most popular values (NaN = -999):
    between_axis_digits_angle_var Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 1907 48.0 0.05818 0.94118
    1 0.000000 55 3.0 0.00168 0.05882
    2 61525.541310 1 0.0 0.00003 0.00000
    3 1070.440496 1 0.0 0.00003 0.00000
    4 1113.573476 1 0.0 0.00003 0.00000
    5 1113.411462 1 0.0 0.00003 0.00000
    6 1113.350526 1 0.0 0.00003 0.00000
    7 1112.569319 1 0.0 0.00003 0.00000
    8 1112.049188 1 0.0 0.00003 0.00000
    9 1110.499685 1 0.0 0.00003 0.00000
    Most correlated values with between_axis_digits_angle_var:
    Column Correlation with between_axis_digits_angle_var
    0 between_axis_digits_angle_var 1.000000
    1 between_digits_angle_cw_var 0.486934
    2 between_digits_angle_ccw_var 0.425766
    3 missing_digit_6 0.371018
    4 missing_digit_9 0.366502
    5 missing_digit_3 0.285999
    112 number_of_hands -0.164113
    113 hand_count_dummy -0.165964
    114 vert_count -0.192770
    115 sequence_flag_cw -0.376472
    116 number_of_digits -0.440209

    between_digits_angle_cw_sum

    (Jump to top)
    between_digits_angle_cw_sum Train Validation
    0 count 20027.000000 202.000000
    1 mean 355.100767 345.742574
    2 std 41.688130 70.384114
    3 min 0.000000 0.000000
    4 25% 360.000000 360.000000
    5 50% 360.000000 360.000000
    6 75% 360.000000 360.000000
    7 max 360.000000 360.000000
    8 unique values 5.000000 3.000000
    9 unique values / count 0.000200 0.008300
    10 NaNs 12750.000000 160.000000
    Most popular values (NaN = -999):
    between_digits_angle_cw_sum Count in train (desc) Count in validation Share in train Share in validation
    0 360.000000 19753 194.0 0.60265 0.53591
    1 -999.000000 12750 160.0 0.38899 0.44199
    2 0.000000 272 8.0 0.00830 0.02210
    3 180.000000 1 0.0 0.00003 0.00000
    4 343.050876 1 0.0 0.00003 0.00000

    between_digits_angle_cw_var

    (Jump to top)
    between_digits_angle_cw_var Train Validation
    0 count 32085.000000 338.000000
    1 mean 3081.777480 4724.892924
    2 std 5648.419057 7387.263597
    3 min 0.000000 8.742291
    4 25% 51.690940 60.350992
    5 50% 165.733916 266.831753
    6 75% 5215.235174 8875.680382
    7 max 63259.726600 58155.070340
    8 unique values 32080.000000 339.000000
    9 unique values / count 0.978700 0.936500
    10 NaNs 692.000000 24.000000
    Most popular values (NaN = -999):
    between_digits_angle_cw_var Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 692 24.0 0.02111 1.0
    1 0.000000 7 0.0 0.00021 0.0
    2 727.278396 1 0.0 0.00003 0.0
    3 543.282446 1 0.0 0.00003 0.0
    4 547.365426 1 0.0 0.00003 0.0
    5 547.013243 1 0.0 0.00003 0.0
    6 546.532744 1 0.0 0.00003 0.0
    7 546.069384 1 0.0 0.00003 0.0
    8 545.314838 1 0.0 0.00003 0.0
    9 545.094328 1 0.0 0.00003 0.0
    Most correlated values with between_digits_angle_cw_var:
    Column Correlation with between_digits_angle_cw_var
    0 between_digits_angle_cw_var 1.000000
    1 between_digits_angle_ccw_var 0.822345
    2 between_axis_digits_angle_var 0.486934
    3 missing_digit_9 0.381017
    4 missing_digit_5 0.376451
    5 missing_digit_6 0.365358
    112 8 dist from cen -0.200091
    113 7 dist from cen -0.207636
    114 between_axis_digits_angle_sum -0.278970
    115 number_of_digits -0.522303
    116 sequence_flag_cw -0.749706

    between_digits_angle_ccw_sum

    (Jump to top)
    between_digits_angle_ccw_sum Train Validation
    0 count 844.000000 22.000000
    1 mean 243.825427 229.090909
    2 std 168.298221 177.251747
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 360.000000 360.000000
    6 75% 360.000000 360.000000
    7 max 360.000000 360.000000
    8 unique values 4.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 31933.000000 340.000000
    Most popular values (NaN = -999):
    between_digits_angle_ccw_sum Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 31933 340.0 0.97425 0.93923
    1 360.000000 571 14.0 0.01742 0.03867
    2 0.000000 272 8.0 0.00830 0.02210
    3 228.660189 1 0.0 0.00003 0.00000

    between_digits_angle_ccw_var

    (Jump to top)
    between_digits_angle_ccw_var Train Validation
    0 count 32085.000000 338.000000
    1 mean 3157.421099 4427.317347
    2 std 5784.787525 7397.081229
    3 min 0.000000 8.742291
    4 25% 52.068825 60.350992
    5 50% 167.238094 259.500649
    6 75% 5224.038548 8310.911603
    7 max 63259.726600 58155.070340
    8 unique values 32080.000000 339.000000
    9 unique values / count 0.978700 0.936500
    10 NaNs 692.000000 24.000000
    Most popular values (NaN = -999):
    between_digits_angle_ccw_var Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 692 24.0 0.02111 1.0
    1 0.000000 7 0.0 0.00021 0.0
    2 740.556958 1 0.0 0.00003 0.0
    3 550.997328 1 0.0 0.00003 0.0
    4 553.806018 1 0.0 0.00003 0.0
    5 553.654116 1 0.0 0.00003 0.0
    6 553.118798 1 0.0 0.00003 0.0
    7 552.857677 1 0.0 0.00003 0.0
    8 552.686185 1 0.0 0.00003 0.0
    9 552.664877 1 0.0 0.00003 0.0
    Most correlated values with between_digits_angle_ccw_var:
    Column Correlation with between_digits_angle_ccw_var
    0 between_digits_angle_ccw_var 1.000000
    1 between_digits_angle_cw_var 0.822345
    2 between_axis_digits_angle_var 0.425766
    3 missing_digit_6 0.366339
    4 missing_digit_9 0.366297
    5 missing_digit_5 0.362640
    112 7 dist from cen -0.182829
    113 vert_count -0.191866
    114 between_axis_digits_angle_sum -0.279401
    115 number_of_digits -0.508269
    116 sequence_flag_cw -0.581017

    sequence_flag_cw

    (Jump to top)
    sequence_flag_cw Train Validation
    0 count 32474.000000 351.000000
    1 mean 0.750385 0.658120
    2 std 0.432797 0.475017
    3 min 0.000000 0.000000
    4 25% 1.000000 0.000000
    5 50% 1.000000 1.000000
    6 75% 1.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 303.000000 11.000000
    Most popular values (NaN = -999):
    sequence_flag_cw Count in train (desc) Count in validation Share in train Share in validation
    0 1.0 24368 231 0.74345 0.63812
    1 0.0 8106 120 0.24731 0.33149
    2 -999.0 303 11 0.00924 0.03039

    sequence_flag_ccw

    (Jump to top)
    sequence_flag_ccw Train Validation
    0 count 32474.000000 351.000000
    1 mean 0.018538 0.039886
    2 std 0.134888 0.195971
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 303.000000 11.000000
    Most popular values (NaN = -999):
    sequence_flag_ccw Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 31872 337 0.97239 0.93094
    1 1.0 602 14 0.01837 0.03867
    2 -999.0 303 11 0.00924 0.03039

    number_of_hands

    (Jump to top)
    number_of_hands Train Validation
    0 count 30623.000000 307.000000
    1 mean 1.772132 1.723127
    2 std 0.457020 0.483268
    3 min 1.000000 1.000000
    4 25% 2.000000 1.000000
    5 50% 2.000000 2.000000
    6 75% 2.000000 2.000000
    7 max 8.000000 3.000000
    8 unique values 8.000000 4.000000
    9 unique values / count 0.000200 0.011000
    10 NaNs 2154.000000 55.000000
    Most popular values (NaN = -999):
    number_of_hands Count in train (desc) Count in validation Share in train Share in validation
    0 2.0 22861 212.0 0.69747 0.58564
    1 1.0 7397 90.0 0.22568 0.24862
    2 -999.0 2154 55.0 0.06572 0.15193
    3 3.0 329 5.0 0.01004 0.01381
    4 4.0 25 0.0 0.00076 0.00000
    5 5.0 8 0.0 0.00024 0.00000
    6 8.0 2 0.0 0.00006 0.00000
    7 6.0 1 0.0 0.00003 0.00000

    hand_count_dummy

    (Jump to top)
    hand_count_dummy Train Validation
    0 count 30623.000000 307.000000
    1 mean 1.770369 1.723127
    2 std 0.448046 0.483268
    3 min 1.000000 1.000000
    4 25% 2.000000 1.000000
    5 50% 2.000000 2.000000
    6 75% 2.000000 2.000000
    7 max 3.000000 3.000000
    8 unique values 4.000000 4.000000
    9 unique values / count 0.000100 0.011000
    10 NaNs 2154.000000 55.000000
    Most popular values (NaN = -999):
    hand_count_dummy Count in train (desc) Count in validation Share in train Share in validation
    0 2.0 22861 212 0.69747 0.58564
    1 1.0 7397 90 0.22568 0.24862
    2 -999.0 2154 55 0.06572 0.15193
    3 3.0 365 5 0.01114 0.01381

    hour_hand_length

    (Jump to top)
    hour_hand_length Train Validation
    0 count 22861.000000 212.000000
    1 mean 60.538409 60.135352
    2 std 14.191507 13.826877
    3 min 23.164818 30.895178
    4 25% 50.270850 50.049918
    5 50% 59.852542 58.839900
    6 75% 70.048169 69.983105
    7 max 123.519704 102.329077
    8 unique values 11932.000000 210.000000
    9 unique values / count 0.364000 0.580100
    10 NaNs 9916.000000 150.000000
    Most popular values (NaN = -999):
    hour_hand_length Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 9916 150.0 0.30253 0.4886
    1 55.815285 12 0.0 0.00037 0.0000
    2 60.118500 11 0.0 0.00034 0.0000
    3 58.730656 11 0.0 0.00034 0.0000
    4 56.093757 11 0.0 0.00034 0.0000
    5 59.852542 11 0.0 0.00034 0.0000
    6 44.099710 10 0.0 0.00031 0.0000
    7 67.562490 10 0.0 0.00031 0.0000
    8 47.905661 10 0.0 0.00031 0.0000
    9 43.794374 10 0.0 0.00031 0.0000
    Most correlated values with hour_hand_length:
    Column Correlation with hour_hand_length
    0 hour_hand_length 1.000000
    1 minute_hand_length 0.520242
    2 11 dist from cen 0.295982
    3 10 dist from cen 0.250122
    4 2 dist from cen 0.236779
    5 12 dist from cen 0.232295
    109 area_digit_12 -0.134570
    110 deviation_from_centre -0.147130
    111 hour_pointing_digit -0.198477
    112 clockhand_diff -0.536956
    113 clockhand_ratio -0.680213

    minute_hand_length

    (Jump to top)
    minute_hand_length Train Validation
    0 count 22861.000000 212.000000
    1 mean 80.874117 79.570952
    2 std 13.311371 13.429312
    3 min 33.186431 47.984618
    4 25% 71.700628 71.510371
    5 50% 80.769660 78.795649
    6 75% 89.989302 89.283438
    7 max 133.691585 116.749554
    8 unique values 13481.000000 213.000000
    9 unique values / count 0.411300 0.588400
    10 NaNs 9916.000000 150.000000
    Most popular values (NaN = -999):
    minute_hand_length Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 9916 150.0 0.30253 0.53957
    1 81.982498 12 0.0 0.00037 0.00000
    2 75.956053 10 0.0 0.00031 0.00000
    3 73.270657 10 0.0 0.00031 0.00000
    4 78.263887 10 0.0 0.00031 0.00000
    5 70.368317 10 0.0 0.00031 0.00000
    6 74.271311 9 0.0 0.00027 0.00000
    7 79.880954 9 0.0 0.00027 0.00000
    8 82.686541 9 0.0 0.00027 0.00000
    9 78.728927 9 0.0 0.00027 0.00000
    Most correlated values with minute_hand_length:
    Column Correlation with minute_hand_length
    0 minute_hand_length 1.000000
    1 hour_hand_length 0.520242
    2 2 dist from cen 0.441396
    3 clockhand_diff 0.433287
    4 3 dist from cen 0.404336
    5 1 dist from cen 0.358070
    109 width_digit_10 -0.196827
    110 width_digit_12 -0.199699
    111 area_digit_2 -0.200065
    112 height_digit_3 -0.212409
    113 area_digit_12 -0.215315

    single_hand_length

    (Jump to top)
    single_hand_length Train Validation
    0 count 7741.000000 95.000000
    1 mean 74.602333 74.529233
    2 std 35.873579 35.973685
    3 min 18.017081 26.658560
    4 25% 53.825024 52.406221
    5 50% 68.324616 68.494581
    6 75% 85.473987 86.451251
    7 max 292.853059 241.175192
    8 unique values 6679.000000 96.000000
    9 unique values / count 0.203800 0.265200
    10 NaNs 25036.000000 267.000000
    Most popular values (NaN = -999):
    single_hand_length Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 25036 267.0 0.76383 0.90816
    1 64.543396 5 0.0 0.00015 0.00000
    2 53.864571 5 0.0 0.00015 0.00000
    3 68.683812 5 0.0 0.00015 0.00000
    4 62.920537 4 0.0 0.00012 0.00000
    5 44.826443 4 0.0 0.00012 0.00000
    6 62.048552 4 0.0 0.00012 0.00000
    7 46.033953 4 0.0 0.00012 0.00000
    8 52.572103 4 0.0 0.00012 0.00000
    9 52.667170 4 0.0 0.00012 0.00000
    Most correlated values with single_hand_length:
    Column Correlation with single_hand_length
    0 single_hand_length 1.000000
    1 number_of_hands 0.799684
    2 hand_count_dummy 0.796532
    3 2 dist from cen 0.205183
    4 10 dist from cen 0.204625
    5 11 dist from cen 0.198807
    98 height_digit_2 -0.136282
    99 area_digit_10 -0.136522
    100 area_digit_2 -0.136554
    101 height_digit_12 -0.143108
    102 area_digit_12 -0.150008

    clockhand_ratio

    (Jump to top)
    clockhand_ratio Train Validation
    0 count 22650.000000 210.000000
    1 mean 1.375478 1.357921
    2 std 0.299056 0.283834
    3 min 1.000000 1.001226
    4 25% 1.138048 1.128084
    5 50% 1.307708 1.264944
    6 75% 1.542887 1.521516
    7 max 2.498143 2.318983
    8 unique values 22643.000000 211.000000
    9 unique values / count 0.690800 0.582900
    10 NaNs 10127.000000 152.000000
    Most popular values (NaN = -999):
    clockhand_ratio Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 10127 152.0 0.30897 1.0
    1 1.081122 2 0.0 0.00006 0.0
    2 1.296263 2 0.0 0.00006 0.0
    3 1.500000 2 0.0 0.00006 0.0
    4 1.374369 2 0.0 0.00006 0.0
    5 1.331005 2 0.0 0.00006 0.0
    6 1.166190 2 0.0 0.00006 0.0
    7 1.313761 2 0.0 0.00006 0.0
    8 1.242335 2 0.0 0.00006 0.0
    9 1.450622 1 0.0 0.00003 0.0
    Most correlated values with clockhand_ratio:
    Column Correlation with clockhand_ratio
    0 clockhand_ratio 1.000000
    1 clockhand_diff 0.942971
    2 hour_pointing_digit 0.257833
    3 minute_hand_length 0.206275
    4 3 dist from cen 0.089609
    5 2 dist from cen 0.088841
    109 hour_proximity_from_11 -0.239052
    110 minute_proximity_from_2 -0.242428
    111 time_diff -0.249547
    112 minute_pointing_digit -0.270242
    113 hour_hand_length -0.680213

    clockhand_diff

    (Jump to top)
    clockhand_diff Train Validation
    0 count 22835.000000 212.000000
    1 mean 20.270851 19.435599
    2 std 13.359890 12.867100
    3 min 0.000000 0.087289
    4 25% 9.375855 7.997486
    5 50% 18.837195 16.676536
    6 75% 29.315735 28.337904
    7 max 69.872251 57.496215
    8 unique values 22835.000000 213.000000
    9 unique values / count 0.696700 0.588400
    10 NaNs 9942.000000 150.000000
    Most popular values (NaN = -999):
    clockhand_diff Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 9942 150.0 0.30332 1.0
    1 16.378449 2 0.0 0.00006 0.0
    2 25.423720 1 0.0 0.00003 0.0
    3 25.448239 1 0.0 0.00003 0.0
    4 25.445561 1 0.0 0.00003 0.0
    5 25.440034 1 0.0 0.00003 0.0
    6 25.432853 1 0.0 0.00003 0.0
    7 25.431207 1 0.0 0.00003 0.0
    8 25.429854 1 0.0 0.00003 0.0
    9 25.427989 1 0.0 0.00003 0.0
    Most correlated values with clockhand_diff:
    Column Correlation with clockhand_diff
    0 clockhand_diff 1.000000
    1 clockhand_ratio 0.942971
    2 minute_hand_length 0.433287
    3 hour_pointing_digit 0.272398
    4 2 dist from cen 0.180215
    5 3 dist from cen 0.174845
    109 minute_proximity_from_2 -0.253699
    110 time_diff -0.263345
    111 hour_proximity_from_11 -0.266208
    112 minute_pointing_digit -0.288317
    113 hour_hand_length -0.536956

    angle_between_hands

    (Jump to top)
    angle_between_hands Train Validation
    0 count 22861.000000 212.000000
    1 mean 90.170001 90.239117
    2 std 23.522379 26.164275
    3 min 0.029409 6.751041
    4 25% 82.023132 80.093143
    5 50% 91.649346 91.296687
    6 75% 101.621967 103.125832
    7 max 179.518624 179.918115
    8 unique values 22839.000000 213.000000
    9 unique values / count 0.696800 0.588400
    10 NaNs 9916.000000 150.000000
    Most popular values (NaN = -999):
    angle_between_hands Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 9916 150.0 0.30253 1.0
    1 90.000000 7 0.0 0.00021 0.0
    2 108.434949 2 0.0 0.00006 0.0
    3 101.915147 2 0.0 0.00006 0.0
    4 96.047537 2 0.0 0.00006 0.0
    5 87.888017 2 0.0 0.00006 0.0
    6 80.371844 2 0.0 0.00006 0.0
    7 95.022591 2 0.0 0.00006 0.0
    8 94.635463 2 0.0 0.00006 0.0
    9 101.475144 2 0.0 0.00006 0.0
    Most correlated values with angle_between_hands:
    Column Correlation with angle_between_hands
    0 angle_between_hands 1.000000
    1 hour_proximity_from_11 0.091000
    2 clockhand_ratio 0.081459
    3 number_of_digits 0.075117
    4 clockhand_diff 0.072300
    5 5 dist from cen 0.065082
    109 missing_digit_1 -0.080816
    110 hour_hand_length -0.090534
    111 minute_proximity_from_2 -0.142554
    112 minute_pointing_digit -0.217342
    113 eleven_ten_error -0.484725

    deviation_from_centre

    (Jump to top)
    deviation_from_centre Train Validation
    0 count 22793.000000 210.000000
    1 mean 17.420096 17.049692
    2 std 19.001146 17.908754
    3 min 0.100852 1.399172
    4 25% 8.079066 8.068298
    5 50% 13.104333 12.563232
    6 75% 20.451729 20.540030
    7 max 298.723197 166.550485
    8 unique values 22794.000000 211.000000
    9 unique values / count 0.695400 0.582900
    10 NaNs 9984.000000 152.000000
    Most popular values (NaN = -999):
    deviation_from_centre Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 9984 152.0 0.30460 1.0
    1 17.310650 1 0.0 0.00003 0.0
    2 17.338296 1 0.0 0.00003 0.0
    3 17.332467 1 0.0 0.00003 0.0
    4 17.331895 1 0.0 0.00003 0.0
    5 17.328964 1 0.0 0.00003 0.0
    6 17.328468 1 0.0 0.00003 0.0
    7 17.327137 1 0.0 0.00003 0.0
    8 17.326445 1 0.0 0.00003 0.0
    9 17.325690 1 0.0 0.00003 0.0
    Most correlated values with deviation_from_centre:
    Column Correlation with deviation_from_centre
    0 deviation_from_centre 1.000000
    1 eleven_ten_error 0.195836
    2 minute_proximity_from_2 0.130198
    3 between_digits_angle_cw_var 0.123580
    4 between_axis_digits_angle_var 0.120486
    5 minute_pointing_digit 0.114182
    109 5 dist from cen -0.135548
    110 number_of_digits -0.136215
    111 4 dist from cen -0.145001
    112 hour_hand_length -0.147130
    113 3 dist from cen -0.149239

    intersection_pos_rel_centre

    (Jump to top)
    intersection_pos_rel_centre Train Validation
    0 count 22861 212
    1 unique 4 4
    2 top TL TL
    3 freq 10264 98
    4 unique values 5 5
    5 unique values / count 0.0002 0.0138
    6 NaNs 9916 150
    Most popular values (NaN = -999):
    intersection_pos_rel_centre Count in train (desc) Count in validation Share in train Share in validation
    0 TL 10264 98 0.31315 0.27072
    1 -999 9916 150 0.30253 0.41436
    2 BL 5243 40 0.15996 0.11050
    3 TR 4910 51 0.14980 0.14088
    4 BR 2444 23 0.07456 0.06354

    hour_proximity_from_11

    (Jump to top)
    hour_proximity_from_11 Train Validation
    0 count 20191.000000 186.000000
    1 mean 24.922338 29.999736
    2 std 39.291724 42.547637
    3 min 0.000000 0.088822
    4 25% 2.334051 2.553865
    5 50% 5.189904 5.840832
    6 75% 16.677090 41.478602
    7 max 179.774464 172.292277
    8 unique values 20154.000000 187.000000
    9 unique values / count 0.614900 0.516600
    10 NaNs 12586.000000 176.000000
    Most popular values (NaN = -999):
    hour_proximity_from_11 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 12586 176.0 0.38399 1.0
    1 1.224456 3 0.0 0.00009 0.0
    2 5.792796 3 0.0 0.00009 0.0
    3 0.800488 3 0.0 0.00009 0.0
    4 2.935673 2 0.0 0.00006 0.0
    5 0.838200 2 0.0 0.00006 0.0
    6 1.116010 2 0.0 0.00006 0.0
    7 1.561338 2 0.0 0.00006 0.0
    8 8.325650 2 0.0 0.00006 0.0
    9 3.964655 2 0.0 0.00006 0.0
    Most correlated values with hour_proximity_from_11:
    Column Correlation with hour_proximity_from_11
    0 hour_proximity_from_11 1.000000
    1 time_diff 0.891423
    2 minute_pointing_digit 0.796875
    3 minute_proximity_from_2 0.743305
    4 other_error 0.645633
    5 hour_hand_length 0.145876
    108 2 dist from cen -0.112809
    109 minute_hand_length -0.116291
    110 clockhand_ratio -0.239052
    111 clockhand_diff -0.266208
    112 hour_pointing_digit -0.901003

    minute_proximity_from_2

    (Jump to top)
    minute_proximity_from_2 Train Validation
    0 count 19919.000000 183.000000
    1 mean 33.267558 36.490160
    2 std 46.534051 47.163293
    3 min 0.000000 0.031815
    4 25% 1.728408 1.662054
    5 50% 4.258455 4.510643
    6 75% 77.721980 80.294444
    7 max 179.928116 178.954617
    8 unique values 19900.000000 184.000000
    9 unique values / count 0.607100 0.508300
    10 NaNs 12858.000000 179.000000
    Most popular values (NaN = -999):
    minute_proximity_from_2 Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 12858 179.0 0.39229 1.0
    1 0.344459 2 0.0 0.00006 0.0
    2 4.184916 2 0.0 0.00006 0.0
    3 3.570477 2 0.0 0.00006 0.0
    4 107.452832 2 0.0 0.00006 0.0
    5 6.340192 2 0.0 0.00006 0.0
    6 0.655530 2 0.0 0.00006 0.0
    7 1.709814 2 0.0 0.00006 0.0
    8 5.599339 2 0.0 0.00006 0.0
    9 1.478273 2 0.0 0.00006 0.0
    Most correlated values with minute_proximity_from_2:
    Column Correlation with minute_proximity_from_2
    0 minute_proximity_from_2 1.000000
    1 minute_pointing_digit 0.824663
    2 hour_proximity_from_11 0.743305
    3 other_error 0.688956
    4 time_diff 0.661584
    5 eleven_ten_error 0.279171
    108 number_of_digits -0.240229
    109 clockhand_ratio -0.242428
    110 clockhand_diff -0.253699
    111 sequence_flag_cw -0.268027
    112 hour_pointing_digit -0.689805

    hour_pointing_digit

    (Jump to top)
    hour_pointing_digit Train Validation
    0 count 22677.000000 209.000000
    1 mean 9.047449 8.956938
    2 std 3.661448 3.727892
    3 min 1.000000 1.000000
    4 25% 10.000000 9.000000
    5 50% 11.000000 11.000000
    6 75% 11.000000 11.000000
    7 max 12.000000 12.000000
    8 unique values 13.000000 11.000000
    9 unique values / count 0.000400 0.030400
    10 NaNs 10100.000000 153.000000
    Most popular values (NaN = -999):
    hour_pointing_digit Count in train (desc) Count in validation Share in train Share in validation
    0 11.0 15230 129.0 0.46466 0.35635
    1 -999.0 10100 153.0 0.30814 0.42265
    2 2.0 3695 36.0 0.11273 0.09945
    3 10.0 1257 12.0 0.03835 0.03315
    4 12.0 971 15.0 0.02962 0.04144
    5 1.0 554 5.0 0.01690 0.01381
    6 3.0 332 2.0 0.01013 0.00552
    7 9.0 183 2.0 0.00558 0.00552
    8 8.0 147 4.0 0.00448 0.01105
    9 4.0 113 2.0 0.00345 0.00552
    Most correlated values with hour_pointing_digit:
    Column Correlation with hour_pointing_digit
    0 hour_pointing_digit 1.000000
    1 clockhand_diff 0.272398
    2 clockhand_ratio 0.257833
    3 eleven_ten_error 0.084089
    4 2 dist from cen 0.075340
    5 8 dist from cen 0.069877
    109 other_error -0.594034
    110 minute_proximity_from_2 -0.689805
    111 minute_pointing_digit -0.741958
    112 hour_proximity_from_11 -0.901003
    113 time_diff -0.997903

    actual_hour_digit

    (Jump to top)
    actual_hour_digit Train Validation
    0 count 32777.0 362.0000
    1 mean 11.0 11.0000
    2 std 0.0 0.0000
    3 min 11.0 11.0000
    4 25% 11.0 11.0000
    5 50% 11.0 11.0000
    6 75% 11.0 11.0000
    7 max 11.0 11.0000
    8 unique values 1.0 1.0000
    9 unique values / count 0.0 0.0028
    10 NaNs 0.0 0.0000
    Most popular values (NaN = -999):
    actual_hour_digit Count in train (desc) Count in validation Share in train Share in validation
    0 11 32777 362 1.0 1.0

    minute_pointing_digit

    (Jump to top)
    minute_pointing_digit Train Validation
    0 count 22678.000000 208.000000
    1 mean 4.393377 4.769231
    2 std 3.965286 4.093664
    3 min 1.000000 1.000000
    4 25% 2.000000 2.000000
    5 50% 2.000000 2.000000
    6 75% 10.000000 11.000000
    7 max 12.000000 12.000000
    8 unique values 13.000000 12.000000
    9 unique values / count 0.000400 0.033100
    10 NaNs 10099.000000 154.000000
    Most popular values (NaN = -999):
    minute_pointing_digit Count in train (desc) Count in validation Share in train Share in validation
    0 2.0 13325 113.0 0.40654 0.31215
    1 -999.0 10099 154.0 0.30811 0.42541
    2 11.0 4232 47.0 0.12911 0.12983
    3 1.0 1703 12.0 0.05196 0.03315
    4 3.0 1263 16.0 0.03853 0.04420
    5 10.0 771 7.0 0.02352 0.01934
    6 12.0 762 6.0 0.02325 0.01657
    7 4.0 141 1.0 0.00430 0.00276
    8 9.0 135 2.0 0.00412 0.00552
    9 8.0 108 1.0 0.00329 0.00276
    Most correlated values with minute_pointing_digit:
    Column Correlation with minute_pointing_digit
    0 minute_pointing_digit 1.000000
    1 minute_proximity_from_2 0.824663
    2 hour_proximity_from_11 0.796875
    3 time_diff 0.697008
    4 other_error 0.520950
    5 eleven_ten_error 0.305689
    109 number_of_digits -0.148978
    110 angle_between_hands -0.217342
    111 clockhand_ratio -0.270242
    112 clockhand_diff -0.288317
    113 hour_pointing_digit -0.741958

    actual_minute_digit

    (Jump to top)
    actual_minute_digit Train Validation
    0 count 32777.0 362.0000
    1 mean 2.0 2.0000
    2 std 0.0 0.0000
    3 min 2.0 2.0000
    4 25% 2.0 2.0000
    5 50% 2.0 2.0000
    6 75% 2.0 2.0000
    7 max 2.0 2.0000
    8 unique values 1.0 1.0000
    9 unique values / count 0.0 0.0028
    10 NaNs 0.0 0.0000
    Most popular values (NaN = -999):
    actual_minute_digit Count in train (desc) Count in validation Share in train Share in validation
    0 2 32777 362 1.0 1.0

    final_rotation_angle

    (Jump to top)
    final_rotation_angle Train Validation
    0 count 32703.000000 359.000000
    1 mean 65.737088 65.933148
    2 std 110.472325 106.562102
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 90.000000 90.000000
    7 max 330.000000 330.000000
    8 unique values 13.000000 13.000000
    9 unique values / count 0.000400 0.035900
    10 NaNs 74.000000 3.000000
    Most popular values (NaN = -999):
    final_rotation_angle Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 20966 213 0.63966 0.58840
    1 90.0 2961 30 0.09034 0.08287
    2 270.0 2504 24 0.07640 0.06630
    3 330.0 2096 17 0.06395 0.04696
    4 30.0 1571 29 0.04793 0.08011
    5 60.0 684 12 0.02087 0.03315
    6 240.0 653 6 0.01992 0.01657
    7 300.0 460 10 0.01403 0.02762
    8 150.0 229 5 0.00699 0.01381
    9 210.0 227 2 0.00693 0.00552
    Most correlated values with final_rotation_angle:
    Column Correlation with final_rotation_angle
    0 final_rotation_angle 1.000000
    1 euc_dist_digit_11 0.582144
    2 euc_dist_digit_9 0.575165
    3 euc_dist_digit_10 0.571299
    4 euc_dist_digit_3 0.570184
    5 deviation_dist_from_mid_axis 0.562626
    112 height_digit_8 -0.243142
    113 height_digit_9 -0.244160
    114 height_digit_7 -0.253551
    115 height_digit_6 -0.264293
    116 height_digit_1 -0.311311

    ellipse_circle_ratio

    (Jump to top)
    ellipse_circle_ratio Train Validation
    0 count 3.203900e+04 350.000000
    1 mean 7.911654e+01 78.340194
    2 std 1.453976e+01 14.773874
    3 min 5.060000e-10 12.502368
    4 25% 7.802811e+01 77.227502
    5 50% 8.375303e+01 83.325646
    6 75% 8.742738e+01 86.784052
    7 max 9.997281e+01 99.147283
    8 unique values 3.203900e+04 351.000000
    9 unique values / count 9.775000e-01 0.969600
    10 NaNs 7.380000e+02 12.000000
    Most popular values (NaN = -999):
    ellipse_circle_ratio Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 738 12.0 0.02252 1.0
    1 88.760307 2 0.0 0.00006 0.0
    2 86.232981 1 0.0 0.00003 0.0
    3 86.240018 1 0.0 0.00003 0.0
    4 86.239462 1 0.0 0.00003 0.0
    5 86.239349 1 0.0 0.00003 0.0
    6 86.239173 1 0.0 0.00003 0.0
    7 86.239094 1 0.0 0.00003 0.0
    8 86.237093 1 0.0 0.00003 0.0
    9 86.236842 1 0.0 0.00003 0.0
    Most correlated values with ellipse_circle_ratio:
    Column Correlation with ellipse_circle_ratio
    0 ellipse_circle_ratio 1.000000
    1 count_defects 0.797305
    2 percentage_inside_ellipse 0.418027
    3 double_minor 0.375271
    4 vertical_dist 0.298778
    5 horizontal_dist 0.262587
    112 missing_digit_11 -0.140302
    113 between_digits_angle_cw_var -0.141595
    114 missing_digit_1 -0.158602
    115 missing_digit_6 -0.171639
    116 pred_tremor -0.582673

    count_defects

    (Jump to top)
    count_defects Train Validation
    0 count 32777.000000 362.000000
    1 mean 93.489459 90.502762
    2 std 39.504488 40.329684
    3 min 1.000000 1.000000
    4 25% 78.000000 72.250000
    5 50% 103.000000 99.000000
    6 75% 121.000000 120.000000
    7 max 176.000000 159.000000
    8 unique values 172.000000 119.000000
    9 unique values / count 0.005200 0.328700
    10 NaNs 0.000000 0.000000
    Most popular values (NaN = -999):
    count_defects Count in train (desc) Count in validation Share in train Share in validation
    0 1 1480 19.0 0.04515 0.05249
    1 2 1344 13.0 0.04100 0.03591
    2 105 476 4.0 0.01452 0.01105
    3 114 475 3.0 0.01449 0.00829
    4 109 472 4.0 0.01440 0.01105
    5 108 469 2.0 0.01431 0.00552
    6 104 467 4.0 0.01425 0.01105
    7 111 464 8.0 0.01416 0.02210
    8 123 463 6.0 0.01413 0.01657
    9 122 462 5.0 0.01410 0.01381
    Most correlated values with count_defects:
    Column Correlation with count_defects
    0 count_defects 1.000000
    1 ellipse_circle_ratio 0.797305
    2 percentage_inside_ellipse 0.402677
    3 between_digits_angle_ccw_sum 0.378013
    4 number_of_digits 0.337459
    5 double_minor 0.272497
    112 missing_digit_7 -0.196397
    113 missing_digit_1 -0.214796
    114 missing_digit_3 -0.222940
    115 missing_digit_6 -0.249927
    116 pred_tremor -0.813348

    percentage_inside_ellipse

    (Jump to top)
    percentage_inside_ellipse Train Validation
    0 count 32472.000000 349.000000
    1 mean 0.939555 0.931956
    2 std 0.169569 0.185714
    3 min 0.000000 0.000000
    4 25% 1.000000 1.000000
    5 50% 1.000000 1.000000
    6 75% 1.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 71.000000 27.000000
    9 unique values / count 0.002200 0.074600
    10 NaNs 305.000000 13.000000
    Most popular values (NaN = -999):
    percentage_inside_ellipse Count in train (desc) Count in validation Share in train Share in validation
    0 1.000000 26211 277.0 0.79968 0.76519
    1 0.923077 485 3.0 0.01480 0.00829
    2 0.916667 444 8.0 0.01355 0.02210
    3 0.909091 385 4.0 0.01175 0.01105
    4 -999.000000 305 13.0 0.00931 0.03591
    5 0.900000 291 4.0 0.00888 0.01105
    6 0.833333 290 3.0 0.00885 0.00829
    7 0.666667 274 4.0 0.00836 0.01105
    8 0.750000 269 6.0 0.00821 0.01657
    9 0.500000 260 2.0 0.00793 0.00552
    Most correlated values with percentage_inside_ellipse:
    Column Correlation with percentage_inside_ellipse
    0 percentage_inside_ellipse 1.000000
    1 double_minor 0.525281
    2 ellipse_circle_ratio 0.418027
    3 count_defects 0.402677
    4 vertical_dist 0.370782
    5 horizontal_dist 0.352843
    112 2 dist from cen -0.191669
    113 8 dist from cen -0.194555
    114 9 dist from cen -0.206453
    115 missing_digit_6 -0.209613
    116 pred_tremor -0.377465

    pred_tremor

    (Jump to top)
    pred_tremor Train Validation
    0 count 32777.000000 362.000000
    1 mean 0.317052 0.370166
    2 std 0.465335 0.483517
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 1.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 2.000000 2.000000
    9 unique values / count 0.000100 0.005500
    10 NaNs 0.000000 0.000000
    Most popular values (NaN = -999):
    pred_tremor Count in train (desc) Count in validation Share in train Share in validation
    0 0 22385 228 0.68295 0.62983
    1 1 10392 134 0.31705 0.37017

    double_major

    (Jump to top)
    double_major Train Validation
    0 count 32540.000000 356.000000
    1 mean 120.238950 121.543012
    2 std 19.864539 26.870680
    3 min 9.696612 69.766987
    4 25% 115.425250 115.500773
    5 50% 120.289760 120.663224
    6 75% 124.313176 124.568132
    7 max 499.391604 391.080498
    8 unique values 32329.000000 357.000000
    9 unique values / count 0.986300 0.986200
    10 NaNs 237.000000 6.000000
    Most popular values (NaN = -999):
    double_major Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 237 6.0 0.00723 0.66667
    1 391.080498 44 1.0 0.00134 0.11111
    2 388.485640 10 1.0 0.00031 0.11111
    3 122.105888 2 0.0 0.00006 0.00000
    4 111.703500 2 0.0 0.00006 0.00000
    5 122.537322 2 0.0 0.00006 0.00000
    6 117.943730 2 0.0 0.00006 0.00000
    7 122.437394 2 0.0 0.00006 0.00000
    8 122.435657 2 0.0 0.00006 0.00000
    9 119.954358 2 0.0 0.00006 0.00000
    Most correlated values with double_major:
    Column Correlation with double_major
    0 double_major 1.000000
    1 vertical_dist 0.845080
    2 horizontal_dist 0.598207
    3 double_minor 0.429605
    4 1 dist from cen 0.248186
    5 2 dist from cen 0.204333
    112 area_digit_2 -0.162357
    113 area_digit_4 -0.165312
    114 height_digit_3 -0.170254
    115 between_digits_angle_ccw_sum -0.306606
    116 between_digits_angle_cw_sum -0.321985

    double_minor

    (Jump to top)
    double_minor Train Validation
    0 count 3.277600e+04 362.000000
    1 mean 1.065362e+02 105.856352
    2 std 1.295960e+01 15.527933
    3 min 4.210000e-10 11.273212
    4 25% 1.027451e+02 101.999828
    5 50% 1.093602e+02 109.019512
    6 75% 1.135460e+02 113.820122
    7 max 3.051857e+02 187.195461
    8 unique values 3.247000e+04 361.000000
    9 unique values / count 9.906000e-01 0.997200
    10 NaNs 1.000000e+00 0.000000
    Most popular values (NaN = -999):
    double_minor Count in train (desc) Count in validation Share in train Share in validation
    0 133.725995 82 2.0 0.00250 0.250
    1 123.530429 44 1.0 0.00134 0.125
    2 123.391717 10 1.0 0.00031 0.125
    3 105.605306 3 0.0 0.00009 0.000
    4 108.576351 3 0.0 0.00009 0.000
    5 111.876235 2 0.0 0.00006 0.000
    6 103.811491 2 0.0 0.00006 0.000
    7 109.883095 2 0.0 0.00006 0.000
    8 112.518642 2 0.0 0.00006 0.000
    9 114.214601 2 0.0 0.00006 0.000
    Most correlated values with double_minor:
    Column Correlation with double_minor
    0 double_minor 1.000000
    1 horizontal_dist 0.772740
    2 vertical_dist 0.624012
    3 percentage_inside_ellipse 0.525281
    4 double_major 0.429605
    5 ellipse_circle_ratio 0.375271
    112 height_digit_12 -0.206794
    113 variance_area -0.211386
    114 width_digit_12 -0.211453
    115 area_digit_12 -0.247857
    116 pred_tremor -0.349821

    vertical_dist

    (Jump to top)
    vertical_dist Train Validation
    0 count 3.258900e+04 357.000000
    1 mean 1.115766e+02 111.653382
    2 std 1.900768e+01 25.161019
    3 min 4.210000e-10 35.368530
    4 25% 1.077028e+02 106.100329
    5 50% 1.130389e+02 112.352202
    6 75% 1.163918e+02 116.403149
    7 max 4.993892e+02 390.900352
    8 unique values 3.253500e+04 358.000000
    9 unique values / count 9.926000e-01 0.989000
    10 NaNs 1.880000e+02 5.000000
    Most popular values (NaN = -999):
    vertical_dist Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 188 5.0 0.00574 0.71429
    1 390.900352 44 1.0 0.00134 0.14286
    2 388.331914 10 1.0 0.00031 0.14286
    3 171.375846 2 0.0 0.00006 0.00000
    4 104.052479 2 0.0 0.00006 0.00000
    5 114.312094 2 0.0 0.00006 0.00000
    6 115.289004 1 0.0 0.00003 0.00000
    7 115.288905 1 0.0 0.00003 0.00000
    8 115.288896 1 0.0 0.00003 0.00000
    9 115.287365 1 0.0 0.00003 0.00000
    Most correlated values with vertical_dist:
    Column Correlation with vertical_dist
    0 vertical_dist 1.000000
    1 double_major 0.845080
    2 double_minor 0.624012
    3 horizontal_dist 0.440092
    4 percentage_inside_ellipse 0.370782
    5 ellipse_circle_ratio 0.298778
    112 area_digit_3 -0.171541
    113 area_digit_4 -0.176900
    114 area_digit_2 -0.179014
    115 between_digits_angle_ccw_sum -0.263032
    116 between_digits_angle_cw_sum -0.284873

    horizontal_dist

    (Jump to top)
    horizontal_dist Train Validation
    0 count 3.273500e+04 361.000000
    1 mean 1.139164e+02 113.980217
    2 std 1.467719e+01 16.893973
    3 min 2.600000e-09 11.274980
    4 25% 1.099097e+02 108.622829
    5 50% 1.160801e+02 115.858163
    6 75% 1.201251e+02 120.741595
    7 max 4.928847e+02 258.983248
    8 unique values 3.259800e+04 361.000000
    9 unique values / count 9.945000e-01 0.997200
    10 NaNs 4.200000e+01 1.000000
    Most popular values (NaN = -999):
    horizontal_dist Count in train (desc) Count in validation Share in train Share in validation
    0 133.935995 82 2.0 0.00250 0.4
    1 123.536111 44 1.0 0.00134 0.2
    2 -999.000000 42 1.0 0.00128 0.2
    3 123.396646 10 1.0 0.00031 0.2
    4 181.381500 2 0.0 0.00006 0.0
    5 120.848869 2 0.0 0.00006 0.0
    6 109.965183 2 0.0 0.00006 0.0
    7 114.309487 2 0.0 0.00006 0.0
    8 121.489656 2 0.0 0.00006 0.0
    9 118.728776 1 0.0 0.00003 0.0
    Most correlated values with horizontal_dist:
    Column Correlation with horizontal_dist
    0 horizontal_dist 1.000000
    1 double_minor 0.772740
    2 double_major 0.598207
    3 vertical_dist 0.440092
    4 percentage_inside_ellipse 0.352843
    5 12 dist from cen 0.281426
    112 area_digit_4 -0.192197
    113 width_digit_12 -0.201192
    114 height_digit_3 -0.211234
    115 area_digit_12 -0.215978
    116 pred_tremor -0.242870

    top_area_perc

    (Jump to top)
    top_area_perc Train Validation
    0 count 31218.000000 335.000000
    1 mean 0.519007 0.514543
    2 std 0.180912 0.163277
    3 min 0.000000 0.000198
    4 25% 0.472774 0.474520
    5 50% 0.493087 0.496169
    6 75% 0.516964 0.517550
    7 max 1.000000 0.999766
    8 unique values 31154.000000 336.000000
    9 unique values / count 0.950500 0.928200
    10 NaNs 1559.000000 27.000000
    Most popular values (NaN = -999):
    top_area_perc Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 1559 27.0 0.04756 0.96429
    1 1.000000 42 0.0 0.00128 0.00000
    2 0.000000 21 0.0 0.00064 0.00000
    3 0.500000 2 0.0 0.00006 0.00000
    4 0.494486 2 0.0 0.00006 0.00000
    5 0.502998 2 0.0 0.00006 0.00000
    6 0.496161 2 0.0 0.00006 0.00000
    7 0.507022 1 0.0 0.00003 0.00000
    8 0.507024 1 0.0 0.00003 0.00000
    9 0.507118 1 0.0 0.00003 0.00000
    Most correlated values with top_area_perc:
    Column Correlation with top_area_perc
    0 top_area_perc 1.000000
    1 pred_tremor 0.087678
    2 1 dist from cen 0.079866
    3 12 dist from cen 0.078830
    4 11 dist from cen 0.059145
    5 2 dist from cen 0.052549
    112 euc_dist_digit_3 -0.074778
    113 euc_dist_digit_11 -0.078163
    114 euc_dist_digit_10 -0.080901
    115 euc_dist_digit_9 -0.083978
    116 bottom_area_perc -0.911648

    bottom_area_perc

    (Jump to top)
    bottom_area_perc Train Validation
    0 count 31218.000000 335.000000
    1 mean 0.465878 0.470221
    2 std 0.178807 0.167767
    3 min 0.000000 0.000135
    4 25% 0.480002 0.480422
    5 50% 0.505231 0.502377
    6 75% 0.525196 0.524153
    7 max 1.000000 0.999609
    8 unique values 31150.000000 336.000000
    9 unique values / count 0.950400 0.928200
    10 NaNs 1559.000000 27.000000
    Most popular values (NaN = -999):
    bottom_area_perc Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 1559 27.0 0.04756 1.0
    1 0.000000 42 0.0 0.00128 0.0
    2 1.000000 21 0.0 0.00064 0.0
    3 0.493283 2 0.0 0.00006 0.0
    4 0.499756 2 0.0 0.00006 0.0
    5 0.503140 2 0.0 0.00006 0.0
    6 0.000032 2 0.0 0.00006 0.0
    7 0.000048 2 0.0 0.00006 0.0
    8 0.000049 2 0.0 0.00006 0.0
    9 0.508722 2 0.0 0.00006 0.0
    Most correlated values with bottom_area_perc:
    Column Correlation with bottom_area_perc
    0 bottom_area_perc 1.000000
    1 euc_dist_digit_9 0.088688
    2 euc_dist_digit_10 0.085421
    3 count_defects 0.082633
    4 euc_dist_digit_11 0.081441
    5 deviation_dist_from_mid_axis 0.079396
    112 11 dist from cen -0.059124
    113 1 dist from cen -0.075262
    114 12 dist from cen -0.080935
    115 pred_tremor -0.095444
    116 top_area_perc -0.911648

    left_area_perc

    (Jump to top)
    left_area_perc Train Validation
    0 count 31218.000000 335.000000
    1 mean 0.525769 0.533752
    2 std 0.160916 0.142216
    3 min 0.000000 0.000000
    4 25% 0.502471 0.502961
    5 50% 0.520833 0.522655
    6 75% 0.540867 0.542011
    7 max 1.000000 0.999422
    8 unique values 31164.000000 336.000000
    9 unique values / count 0.950800 0.928200
    10 NaNs 1559.000000 27.000000
    Most popular values (NaN = -999):
    left_area_perc Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 1559 27.0 0.04756 0.96429
    1 0.000000 27 1.0 0.00082 0.03571
    2 1.000000 22 0.0 0.00067 0.00000
    3 0.500000 3 0.0 0.00009 0.00000
    4 0.525501 2 0.0 0.00006 0.00000
    5 0.552809 2 0.0 0.00006 0.00000
    6 0.517642 2 0.0 0.00006 0.00000
    7 0.535794 2 0.0 0.00006 0.00000
    8 0.506130 2 0.0 0.00006 0.00000
    9 0.519284 2 0.0 0.00006 0.00000
    Most correlated values with left_area_perc:
    Column Correlation with left_area_perc
    0 left_area_perc 1.000000
    1 minute_pointing_digit 0.065997
    2 minute_proximity_from_2 0.056346
    3 final_rotation_angle 0.049977
    4 hour_proximity_from_11 0.048263
    5 height_digit_6 0.033625
    112 3 dist from cen -0.057945
    113 2 dist from cen -0.058623
    114 vert_count -0.076923
    115 between_digits_angle_ccw_sum -0.082855
    116 right_area_perc -0.953118

    right_area_perc

    (Jump to top)
    right_area_perc Train Validation
    0 count 31218.000000 335.000000
    1 mean 0.464433 0.461093
    2 std 0.160926 0.144587
    3 min 0.000000 0.000311
    4 25% 0.457726 0.456867
    5 50% 0.478135 0.476417
    6 75% 0.496303 0.496495
    7 max 1.000000 1.000000
    8 unique values 31163.000000 336.000000
    9 unique values / count 0.950800 0.928200
    10 NaNs 1559.000000 27.000000
    Most popular values (NaN = -999):
    right_area_perc Count in train (desc) Count in validation Share in train Share in validation
    0 -999.000000 1559 27.0 0.04756 0.96429
    1 1.000000 27 1.0 0.00082 0.03571
    2 0.000000 22 0.0 0.00067 0.00000
    3 0.489436 2 0.0 0.00006 0.00000
    4 0.483657 2 0.0 0.00006 0.00000
    5 0.504740 2 0.0 0.00006 0.00000
    6 0.464652 2 0.0 0.00006 0.00000
    7 0.485564 2 0.0 0.00006 0.00000
    8 0.477806 2 0.0 0.00006 0.00000
    9 0.484961 2 0.0 0.00006 0.00000
    Most correlated values with right_area_perc:
    Column Correlation with right_area_perc
    0 right_area_perc 1.000000
    1 between_digits_angle_ccw_sum 0.089761
    2 vert_count 0.080314
    3 bottom_area_perc 0.077006
    4 2 dist from cen 0.058974
    5 3 dist from cen 0.055738
    112 hour_proximity_from_11 -0.053852
    113 final_rotation_angle -0.056149
    114 minute_proximity_from_2 -0.062987
    115 minute_pointing_digit -0.070494
    116 left_area_perc -0.953118

    hor_count

    (Jump to top)
    hor_count Train Validation
    0 count 32777.000000 362.000000
    1 mean 0.693230 0.640884
    2 std 0.675787 0.684848
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 1.000000 1.000000
    6 75% 1.000000 1.000000
    7 max 3.000000 3.000000
    8 unique values 4.000000 4.000000
    9 unique values / count 0.000100 0.011000
    10 NaNs 0.000000 0.000000
    Most popular values (NaN = -999):
    hor_count Count in train (desc) Count in validation Share in train Share in validation
    0 1 14730 149 0.44940 0.41160
    1 0 14052 172 0.42872 0.47514
    2 2 3993 40 0.12182 0.11050
    3 3 2 1 0.00006 0.00276

    vert_count

    (Jump to top)
    vert_count Train Validation
    0 count 32777.000000 362.000000
    1 mean 0.762211 0.616022
    2 std 0.699355 0.681426
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 1.000000 1.000000
    6 75% 1.000000 1.000000
    7 max 3.000000 2.000000
    8 unique values 4.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 0.000000 0.000000
    Most popular values (NaN = -999):
    vert_count Count in train (desc) Count in validation Share in train Share in validation
    0 1 14902 141.0 0.45465 0.38950
    1 0 12836 180.0 0.39162 0.49724
    2 2 5036 41.0 0.15364 0.11326
    3 3 3 0.0 0.00009 0.00000

    eleven_ten_error

    (Jump to top)
    eleven_ten_error Train Validation
    0 count 32777.000000 362.000000
    1 mean 0.025231 0.030387
    2 std 0.156829 0.171887
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.000000
    7 max 1.000000 1.000000
    8 unique values 2.000000 2.000000
    9 unique values / count 0.000100 0.005500
    10 NaNs 0.000000 0.000000
    Most popular values (NaN = -999):
    eleven_ten_error Count in train (desc) Count in validation Share in train Share in validation
    0 0 31950 351 0.97477 0.96961
    1 1 827 11 0.02523 0.03039

    other_error

    (Jump to top)
    other_error Train Validation
    0 count 32777.000000 362.000000
    1 mean 0.612655 0.701657
    2 std 0.487151 0.458164
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 1.000000 1.000000
    6 75% 1.000000 1.000000
    7 max 1.000000 1.000000
    8 unique values 2.000000 2.000000
    9 unique values / count 0.000100 0.005500
    10 NaNs 0.000000 0.000000
    Most popular values (NaN = -999):
    other_error Count in train (desc) Count in validation Share in train Share in validation
    0 1 20081 254 0.61266 0.70166
    1 0 12696 108 0.38734 0.29834

    time_diff

    (Jump to top)
    time_diff Train Validation
    0 count 22526.000000 207.000000
    1 mean 105.199325 107.463768
    2 std 205.429390 209.001829
    3 min -110.000000 -110.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 60.000000 90.000000
    7 max 605.000000 600.000000
    8 unique values 141.000000 37.000000
    9 unique values / count 0.004300 0.102200
    10 NaNs 10251.000000 155.000000
    Most popular values (NaN = -999):
    time_diff Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 11869 97.0 0.36211 0.26796
    1 -999.0 10251 155.0 0.31275 0.42818
    2 495.0 2964 32.0 0.09043 0.08840
    3 5.0 1340 11.0 0.04088 0.03039
    4 -5.0 926 9.0 0.02825 0.02486
    5 60.0 633 7.0 0.01931 0.01934
    6 -60.0 393 5.0 0.01199 0.01381
    7 555.0 292 3.0 0.00891 0.00829
    8 500.0 290 0.0 0.00885 0.00000
    9 -45.0 274 5.0 0.00836 0.01381
    Most correlated values with time_diff:
    Column Correlation with time_diff
    0 time_diff 1.000000
    1 hour_proximity_from_11 0.891423
    2 minute_pointing_digit 0.697008
    3 minute_proximity_from_2 0.661584
    4 other_error 0.589530
    5 hour_hand_length 0.194337
    109 2 dist from cen -0.069418
    110 eleven_ten_error -0.119856
    111 clockhand_ratio -0.249547
    112 clockhand_diff -0.263345
    113 hour_pointing_digit -0.997903

    centre_dot_detect

    (Jump to top)
    centre_dot_detect Train Validation
    0 count 22826.000000 212.000000
    1 mean 0.241172 0.250000
    2 std 0.427804 0.434038
    3 min 0.000000 0.000000
    4 25% 0.000000 0.000000
    5 50% 0.000000 0.000000
    6 75% 0.000000 0.250000
    7 max 1.000000 1.000000
    8 unique values 3.000000 3.000000
    9 unique values / count 0.000100 0.008300
    10 NaNs 9951.000000 150.000000
    Most popular values (NaN = -999):
    centre_dot_detect Count in train (desc) Count in validation Share in train Share in validation
    0 0.0 17321 159 0.52845 0.43923
    1 -999.0 9951 150 0.30360 0.41436
    2 1.0 5505 53 0.16795 0.14641
    In [3]:
    COL = "diagnosis"
    h('<h2 id="c_'+COL+'">'+COL+'</h2>'+'<a style="font-size:11px" href="#home">(Jump to Top)</a>')
    import seaborn as sns
    plt.figure(figsize=(15,5))
    sns.countplot("diagnosis",data=tr)
    plt.show()
    

    diagnosis

    (Jump to Top)
    In [ ]:
    
    

    Comments

    You must login before you can post a comment.

    Execute