Loading

ADDI Alzheimers Detection Challenge

What about constant solution???

Selection of constants based on the distribution of test data and 0.806 on the leaderboard

sweetlhare

Define preprocessing code

Import common packages

Please import packages that are common for training and prediction phases here.

In [ ]:
import numpy as np
import os
import pandas as pd
from catboost import CatBoostClassifier, CatBoostRegressor
from lightgbm import LGBMClassifier
import lightgbm as lgb
import joblib
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.metrics import *
import warnings
warnings.filterwarnings('ignore')

Training phase

Load training data

In [ ]:
train_data = pd.read_csv(AICROWD_TRAIN_DATASET_PATH)
train_data.head()
Out[ ]:
row_id number_of_digits missing_digit_1 missing_digit_2 missing_digit_3 missing_digit_4 missing_digit_5 missing_digit_6 missing_digit_7 missing_digit_8 ... bottom_area_perc left_area_perc right_area_perc hor_count vert_count eleven_ten_error other_error time_diff centre_dot_detect diagnosis
0 S0CIXBKIUEOUBNURP 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.526170 0.524975 0.474667 0 0 0 1 -105.0 0.0 normal
1 IW1Z4Z3H720OPW8LL 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.000810 0.516212 0.483330 0 1 0 1 NaN NaN normal
2 PVUGU14JRSU44ZADT 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.488109 0.550606 0.449042 0 0 0 0 0.0 0.0 normal
3 RW5UTGMB9H67LWJHX 7.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 ... NaN NaN NaN 1 0 0 1 NaN NaN normal
4 W0IM2V6F6UP5LYS3E 12.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.512818 0.511865 0.487791 0 1 0 0 0.0 1.0 normal

5 rows × 122 columns

Functions

As we know, test data has another distribution. The best distribution of 3500-1149-420 was found experimentally.

In [ ]:
def get_batch(data, rs):
    neg = 3500
    pos = data[data.diagnosis == 'normal'].sample(neg, random_state=rs)
    neg = data[data.diagnosis != 'normal']
    return pd.concat([pos, neg])

Find constant

In [ ]:
t_df = get_batch(train_data, 17).reset_index(drop=True)
t = np.zeros((t_df.shape[0], 3))
t[t_df[t_df.diagnosis == 'normal'].index, 0] = 1
t[t_df[t_df.diagnosis == 'post_alzheimer'].index, 1] = 1
t[t_df[t_df.diagnosis == 'pre_alzheimer'].index, 2] = 1
t
Out[ ]:
array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.]])
In [ ]:
min_err = 1

prob = 0.5

while prob < 1:
    
    prob_2 = 0
    
    while prob_2 < 1:
        
        if prob + prob_2 < 1:
    
            pred = np.zeros((t_df.shape[0], 3))
            pred[:, 0] = prob
            pred[:, 1] = prob_2
            pred[:, 2] = (1 - prob - prob_2)
            
            if (log_loss(t, pred) + log_loss(t, pred) + log_loss(t, pred)) / 3 < min_err:
                
                min_err = (log_loss(t, pred) + log_loss(t, pred) + log_loss(t, pred)) / 3
            
                print(round(prob, 2), round(prob_2, 2), round((1 - prob - prob_2), 2), 
                      (log_loss(t, pred) + log_loss(t, pred) + log_loss(t, pred)) / 3)
        
        prob_2 += 0.01
    
    prob += 0.01
0.5 0.15 0.35 0.995606882399513
0.5 0.16 0.34 0.9833796195665473
0.5 0.17 0.33 0.9721112340971808
0.5 0.18 0.32 0.9617046606384979
0.5 0.19 0.31 0.9520797279205214
0.5 0.2 0.3 0.943169830570819
0.5 0.21 0.29 0.9349194366219354
0.5 0.22 0.28 0.9272822017346289
0.5 0.23 0.27 0.9202195336261024
0.5 0.24 0.26 0.9136994982711207
0.5 0.25 0.25 0.9076959922187645
0.5 0.26 0.24 0.9021881283965878
0.5 0.27 0.23 0.8971597995750459
0.5 0.28 0.22 0.8925993965621074
0.5 0.29 0.21 0.8884996688255727
0.5 0.3 0.2 0.8848577248673567
0.5 0.31 0.19 0.8816751794381114
0.5 0.32 0.18 0.8789584658003253
0.5 0.33 0.17 0.8767193452646532
0.5 0.34 0.16 0.8749756653482829
0.5 0.35 0.15 0.8737524455831246
0.5 0.36 0.14 0.8730834119255565
0.5 0.37 0.13 0.8730131667021235
0.51 0.31 0.18 0.8724818547975287
0.51 0.32 0.17 0.8700212668017356
0.51 0.33 0.16 0.8680693444018149
0.51 0.34 0.15 0.8666499567945545
0.51 0.35 0.14 0.8657958092619352
0.51 0.36 0.13 0.8655505953479145
0.52 0.31 0.17 0.8638101707006501
0.52 0.32 0.16 0.8616367808406086
0.52 0.33 0.15 0.8600091507497977
0.52 0.34 0.14 0.8589588353750761
0.52 0.35 0.13 0.8585285075860045
0.53 0.3 0.17 0.8580904674112806
0.53 0.31 0.16 0.8556810838522524
0.53 0.32 0.15 0.8538319863013207
0.53 0.33 0.14 0.8525734284430486
0.53 0.34 0.13 0.8519469328118748
0.54 0.3 0.16 0.8502072311951371
0.54 0.31 0.15 0.8481221399452187
0.54 0.32 0.14 0.846642114626826
0.54 0.33 0.13 0.8458073765121017
0.54 0.34 0.12 0.8456726127645976
0.55 0.29 0.16 0.8452222040767056
0.55 0.3 0.15 0.8428851151049995
0.55 0.31 0.14 0.8411690960876199
0.55 0.32 0.13 0.8401128905127749
0.55 0.33 0.12 0.8397698842817204
0.56 0.29 0.15 0.8381283807621277
0.56 0.3 0.14 0.8361603640229606
0.56 0.31 0.13 0.8348681647491288
0.56 0.32 0.12 0.8343036910579538
0.57 0.28 0.15 0.8338615444266071
0.57 0.29 0.14 0.8316238406509847
0.57 0.3 0.13 0.8300796436553654
0.57 0.31 0.12 0.8292791762652033
0.58 0.28 0.14 0.8275695551847404
0.58 0.29 0.13 0.8257556711526659
0.58 0.3 0.12 0.8247032060407163
0.58 0.31 0.11 0.8244801387517103
0.59 0.27 0.14 0.8240098602537452
0.59 0.28 0.13 0.8219066693190892
0.59 0.29 0.12 0.8205845171706841
0.59 0.3 0.11 0.8201094521598907
0.6 0.27 0.13 0.8185453572348518
0.6 0.28 0.12 0.8169338981838653
0.6 0.29 0.11 0.8161891461366165
0.61 0.26 0.13 0.8156870183165351
0.61 0.27 0.12 0.8137644103801759
0.61 0.28 0.11 0.8127303514303454
0.61 0.29 0.1 0.8126732097958351
0.62 0.26 0.12 0.8110916571351688
0.62 0.27 0.11 0.8097464492999654
0.62 0.28 0.1 0.8094000007628736
0.63 0.25 0.12 0.8089341121954523
0.63 0.26 0.11 0.8072533426003692
0.63 0.27 0.1 0.8065957451779044
0.64 0.25 0.11 0.80526978569065
0.64 0.26 0.1 0.8042766265083056
0.65 0.24 0.11 0.8038177870376405
0.65 0.25 0.1 0.8024616623215314
0.65 0.26 0.09 0.802301239587889
0.66 0.24 0.1 0.8011731082173718
0.66 0.25 0.09 0.8006497199499648
0.67 0.23 0.1 0.8004369524058319
0.67 0.24 0.09 0.7995196944869308
0.68 0.23 0.09 0.7989373699099342
0.69 0.22 0.09 0.7989332817702329
0.69 0.23 0.08 0.7986164144949899

So, the best probabilities are 0.69 - 0.23 - 0.08. Let's try to submit it

Prediction phase 🔎

Please make sure to save the weights from the training section in your assets directory and load them in this section

Load test data

In [ ]:
test_data = pd.read_csv(AICROWD_DATASET_PATH)

Generate predictions

In [ ]:
preds = np.zeros((test_data.shape[0], 3))
In [ ]:
predictions = {
    "row_id": test_data["row_id"].values,
    "normal_diagnosis_probability": preds[:, 0],
    "post_alzheimer_diagnosis_probability": preds[:, 1],
    "pre_alzheimer_diagnosis_probability": preds[:, 2],
}

predictions_df = pd.DataFrame.from_dict(predictions)
In [ ]:
predictions_df['normal_diagnosis_probability'] = 0.69 
predictions_df['post_alzheimer_diagnosis_probability'] = 0.23
predictions_df['pre_alzheimer_diagnosis_probability'] = 0.08

Save predictions 📨

In [ ]:
predictions_df.to_csv(AICROWD_PREDICTIONS_PATH, index=False)

Submit to AIcrowd 🚀

NOTE: PLEASE SAVE THE NOTEBOOK BEFORE SUBMITTING IT (Ctrl + S)

In [ ]:
!DATASET_PATH=$AICROWD_DATASET_PATH \
aicrowd notebook submit \
    --assets-dir $AICROWD_ASSETS_DIR \
    --challenge addi-alzheimers-detection-challenge
API Key valid
Saved API Key successfully!
Using notebook: /home/desktop0/python.ipynb for submission...
Removing existing files from submission directory...
Scrubbing API keys from the notebook...
Collecting notebook...
Validating the submission...
Executing install.ipynb...
[NbConvertApp] Converting notebook /home/desktop0/submission/install.ipynb to notebook
[NbConvertApp] Executing notebook with kernel: python
[NbConvertApp] Writing 4292 bytes to /home/desktop0/submission/install.nbconvert.ipynb
Executing predict.ipynb...
[NbConvertApp] Converting notebook /home/desktop0/submission/predict.ipynb to notebook
[NbConvertApp] Executing notebook with kernel: python
[NbConvertApp] Writing 58618 bytes to /home/desktop0/submission/predict.nbconvert.ipynb
submission.zip ━━━━━━━━━━━━━━━━━━━━ 100.0%245.4/245.4 MB2.6 MB/s0:00:00[0m • 0:00:01[36m0:00:05
                                                 ╭─────────────────────────╮                                                 
                                                 │ Successfully submitted! │                                                 
                                                 ╰─────────────────────────╯                                                 
                                                       Important links                                                       
┌──────────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│  This submission │ https://www.aicrowd.com/challenges/addi-alzheimers-detection-challenge/submissions/144207              │
│                  │                                                                                                        │
│  All submissions │ https://www.aicrowd.com/challenges/addi-alzheimers-detection-challenge/submissions?my_submissions=true │
│                  │                                                                                                        │
│      Leaderboard │ https://www.aicrowd.com/challenges/addi-alzheimers-detection-challenge/leaderboards                    │
│                  │                                                                                                        │
│ Discussion forum │ https://discourse.aicrowd.com/c/addi-alzheimers-detection-challenge                                    │
│                  │                                                                                                        │
│   Challenge page │ https://www.aicrowd.com/challenges/addi-alzheimers-detection-challenge                                 │
└──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────┘
In [ ]:


Comments

You must login before you can post a comment.

Execute