Loading

ROVER CLASSIFICATION

How to create a State of the Art Image Classifier

Having fun joining the race to M.A.R.S in the AI Blitz Perseverance

anton.ai

How to create a State of the Art Image Classifier in less than 10 minutes

In [1]:
!pip install aicrowd-cli==0.1
API_KEY = "4c49d255257272f8caf90b1e74b8cccd"
!aicrowd login --api-key $API_KEY
Requirement already satisfied: aicrowd-cli==0.1 in /usr/local/lib/python3.7/dist-packages (0.1.0)
Requirement already satisfied: gitpython in /usr/local/lib/python3.7/dist-packages (from aicrowd-cli==0.1) (3.1.14)
Requirement already satisfied: requests-toolbelt in /usr/local/lib/python3.7/dist-packages (from aicrowd-cli==0.1) (0.9.1)
Requirement already satisfied: toml in /usr/local/lib/python3.7/dist-packages (from aicrowd-cli==0.1) (0.10.2)
Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from aicrowd-cli==0.1) (4.41.1)
Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from aicrowd-cli==0.1) (2.23.0)
Requirement already satisfied: click<8,>=7.1.2 in /usr/local/lib/python3.7/dist-packages (from aicrowd-cli==0.1) (7.1.2)
Requirement already satisfied: rich in /usr/local/lib/python3.7/dist-packages (from aicrowd-cli==0.1) (9.13.0)
Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.7/dist-packages (from gitpython->aicrowd-cli==0.1) (4.0.5)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->aicrowd-cli==0.1) (1.24.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->aicrowd-cli==0.1) (2020.12.5)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->aicrowd-cli==0.1) (2.10)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->aicrowd-cli==0.1) (3.0.4)
Requirement already satisfied: typing-extensions<4.0.0,>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from rich->aicrowd-cli==0.1) (3.7.4.3)
Requirement already satisfied: colorama<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from rich->aicrowd-cli==0.1) (0.4.4)
Requirement already satisfied: pygments<3.0.0,>=2.6.0 in /usr/local/lib/python3.7/dist-packages (from rich->aicrowd-cli==0.1) (2.6.1)
Requirement already satisfied: commonmark<0.10.0,>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from rich->aicrowd-cli==0.1) (0.9.1)
Requirement already satisfied: smmap<4,>=3.0.1 in /usr/local/lib/python3.7/dist-packages (from gitdb<5,>=4.0.1->gitpython->aicrowd-cli==0.1) (3.0.5)
API Key valid
Saved API Key successfully!
In [2]:
!aicrowd dataset download --challenge rover-classification -j 3
sample_submission.csv:   0% 0.00/164k [00:00<?, ?B/s]
sample_submission.csv: 100% 164k/164k [00:00<00:00, 384kB/s]
sample_submission.csv: 100% 164k/164k [00:00<00:00, 384kB/s]

train.csv: 100% 689k/689k [00:00<00:00, 961kB/s]
train.csv: 100% 689k/689k [00:00<00:00, 961kB/s]

train.zip:   0% 0.00/266M [00:00<?, ?B/s]

val.csv:   0% 0.00/64.8k [00:00<?, ?B/s]

val.csv: 100% 64.8k/64.8k [00:00<00:00, 261kB/s]


val.zip:   0% 0.00/26.6M [00:00<?, ?B/s]
train.zip:  13% 33.6M/266M [00:02<00:18, 12.9MB/s]

val.zip: 100% 26.6M/26.6M [00:02<00:00, 10.8MB/s]

test.zip:  51% 33.6M/66.4M [00:05<00:05, 6.31MB/s]
train.zip:  38% 101M/266M [00:05<00:10, 15.8MB/s] 
test.zip: 100% 66.4M/66.4M [00:08<00:00, 7.08MB/s]
test.zip: 100% 66.4M/66.4M [00:08<00:00, 7.68MB/s]
train.zip: 100% 266M/266M [00:14<00:00, 18.9MB/s]
In [3]:
!rm -rf data
!mkdir data


!unzip train.zip -d data/train >/dev/null
!unzip val.zip -d data/val >/dev/null
!unzip test.zip -d data/test >/dev/null
In [4]:
import pandas as pd
import os
import re
import tensorflow as tf
In [5]:
df_train = pd.read_csv("train.csv")
In [6]:
df_val = pd.read_csv("val.csv")
In [7]:
df_train['ImageID'] = df_train['ImageID'].astype(str)+".jpg"
df_val['ImageID'] = df_val['ImageID'].astype(str)+".jpg"
In [8]:
INPUT_SIZE = 256
In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
In [10]:
datagen=ImageDataGenerator(rescale=1./255.)

train_generator=datagen.flow_from_dataframe(
dataframe=df_train,
directory="data/train/",
x_col="ImageID",
y_col="label",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(INPUT_SIZE,INPUT_SIZE))
Found 40000 validated image filenames belonging to 2 classes.
In [11]:
val_generator=datagen.flow_from_dataframe(
dataframe=df_val,
directory="data/val/",
x_col="ImageID",
y_col="label",
batch_size=64,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(INPUT_SIZE,INPUT_SIZE))
Found 4000 validated image filenames belonging to 2 classes.
In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten, Dropout, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers, optimizers
import os
import numpy as np
import pandas as pd
In [13]:
class CustomAugment(object):
    def __call__(self, image):        
        # Random flips and grayscale with some stochasticity
        img = self._random_apply(tf.image.flip_left_right, image, p=0.5)
        img = self._random_apply(self._color_drop, img, p=0.8)
        return img

    def _color_drop(self, x):
        image = tf.image.rgb_to_grayscale(x)
        image = tf.tile(x, [1, 1, 1, 3])
        return x
    
    def _random_apply(self, func, x, p):
        return tf.cond(
          tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32),
                  tf.cast(p, tf.float32)),
          lambda: func(x),
          lambda: x)
In [14]:
data_augmentation = tf.keras.Sequential(
  [
    tf.keras.layers.Lambda(CustomAugment()),
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(INPUT_SIZE, 
                                                              INPUT_SIZE,
                                                              3)),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
  ]
)
In [15]:
model = Sequential()
model.add(data_augmentation)
model.add(tf.keras.applications.ResNet152V2(
    include_top=False,
    weights="imagenet",
    input_shape=(INPUT_SIZE, INPUT_SIZE, 3),
))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(optimizers.RMSprop(lr=0.0001/10), loss="categorical_crossentropy", metrics=["Recall", "Precision"])
In [16]:
model.layers[1].trainable = False
In [17]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VAL=val_generator.n//train_generator.batch_size
In [18]:
model.fit(train_generator, validation_data=val_generator, epochs=5)
Epoch 1/5
1250/1250 [==============================] - 476s 359ms/step - loss: 0.5066 - recall: 0.8270 - precision: 0.8270 - val_loss: 0.0741 - val_recall: 0.9840 - val_precision: 0.9840
Epoch 2/5
1250/1250 [==============================] - 445s 356ms/step - loss: 0.0290 - recall: 0.9902 - precision: 0.9902 - val_loss: 0.0448 - val_recall: 0.9925 - val_precision: 0.9925
Epoch 3/5
1250/1250 [==============================] - 446s 356ms/step - loss: 0.0112 - recall: 0.9967 - precision: 0.9967 - val_loss: 0.0173 - val_recall: 0.9965 - val_precision: 0.9965
Epoch 4/5
1250/1250 [==============================] - 446s 357ms/step - loss: 0.0081 - recall: 0.9980 - precision: 0.9980 - val_loss: 0.0288 - val_recall: 0.9955 - val_precision: 0.9955
Epoch 5/5
1250/1250 [==============================] - 446s 357ms/step - loss: 0.0083 - recall: 0.9982 - precision: 0.9982 - val_loss: 0.0020 - val_recall: 0.9995 - val_precision: 0.9995
Out[18]:
<tensorflow.python.keras.callbacks.History at 0x7f20abdad210>
In [19]:
model.layers[1].trainable = True
In [20]:
len(model.layers[1].layers)
Out[20]:
564
In [21]:
model.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
sequential (Sequential)      (None, None, None, None)  0         
_________________________________________________________________
resnet152v2 (Functional)     (None, 8, 8, 2048)        58331648  
_________________________________________________________________
dropout (Dropout)            (None, 8, 8, 2048)        0         
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 512)               67109376  
_________________________________________________________________
activation (Activation)      (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1026      
=================================================================
Total params: 125,442,050
Trainable params: 125,298,306
Non-trainable params: 143,744
_________________________________________________________________
In [22]:
model.layers[1].trainable = True
for layer in model.layers[1].layers[:100]:
    layer.trainable =  False
In [23]:
model.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
sequential (Sequential)      (None, None, None, None)  0         
_________________________________________________________________
resnet152v2 (Functional)     (None, 8, 8, 2048)        58331648  
_________________________________________________________________
dropout (Dropout)            (None, 8, 8, 2048)        0         
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 512)               67109376  
_________________________________________________________________
activation (Activation)      (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1026      
=================================================================
Total params: 125,442,050
Trainable params: 123,504,898
Non-trainable params: 1,937,152
_________________________________________________________________
In [24]:
tf.keras.backend.set_value(model.optimizer.learning_rate, 0.0001/100)
In [25]:
model.fit(train_generator, validation_data=val_generator, epochs=2)
Epoch 1/2
1250/1250 [==============================] - 445s 356ms/step - loss: 0.0019 - recall: 0.9992 - precision: 0.9992 - val_loss: 0.0016 - val_recall: 0.9995 - val_precision: 0.9995
Epoch 2/2
1250/1250 [==============================] - 444s 355ms/step - loss: 0.0012 - recall: 0.9997 - precision: 0.9997 - val_loss: 4.5734e-04 - val_recall: 0.9998 - val_precision: 0.9998
Out[25]:
<tensorflow.python.keras.callbacks.History at 0x7f20abf9d810>
In [26]:
df_test = pd.read_csv("sample_submission.csv",dtype=str)
df_test["ImageID"] = df_test["ImageID"].astype(str)+".jpg"
In [27]:
test_generator=datagen.flow_from_dataframe(
dataframe=df_test,
directory="data/test/",
x_col="ImageID",
y_col="label",
batch_size=1,
seed=42,
shuffle=False,
class_mode="categorical",
target_size=(INPUT_SIZE,INPUT_SIZE))
Found 10000 validated image filenames belonging to 2 classes.
In [28]:
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size
In [29]:
STEP_SIZE_TEST
Out[29]:
10000
In [30]:
test_generator.reset()
pred = model.predict(test_generator,
steps=STEP_SIZE_TEST, verbose=1)
10000/10000 [==============================] - 186s 18ms/step
In [31]:
predicted_class_indices = np.argmax(pred,axis=1)
In [32]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
In [33]:
df_test["pred"] = predictions
In [34]:
df_test.head()
Out[34]:
ImageID label pred
0 0.jpg curiosity curiosity
1 1.jpg perseverance curiosity
2 2.jpg curiosity curiosity
3 3.jpg perseverance perseverance
4 4.jpg perseverance curiosity
In [35]:
df_test.drop("label", axis=1, inplace=True)
df_test.rename(columns={"pred": "label"}, inplace=True)
In [36]:
df_test.head()
Out[36]:
ImageID label
0 0.jpg curiosity
1 1.jpg curiosity
2 2.jpg curiosity
3 3.jpg perseverance
4 4.jpg curiosity
In [37]:
df_test["ImageID"] = df_test["ImageID"].map(lambda x: re.sub(r"\D", "", str(x)))
In [38]:
df_test.head()
Out[38]:
ImageID label
0 0 curiosity
1 1 curiosity
2 2 curiosity
3 3 perseverance
4 4 curiosity
In [39]:
df_test.to_csv("data/03_sub.csv", index=False)
In [40]:
!aicrowd submission create -c rover-classification -f data/03_sub.csv
03_sub.csv ━━━━━━━━━━━━━━━━━━━━━━━━ 100.0%165.5/163.9 KB3.3 MB/s0:00:00
                                    ╭─────────────────────────╮                                     
                                    │ Successfully submitted! │                                     
                                    ╰─────────────────────────╯                                     
                                          Important links                                           
┌──────────────────┬───────────────────────────────────────────────────────────────────────────────┐
│  This submission │ https://www.aicrowd.com/challenges/ai-blitz-7/submissions/126678              │
│                  │                                                                               │
│  All submissions │ https://www.aicrowd.com/challenges/ai-blitz-7/submissions?my_submissions=true │
│                  │                                                                               │
│      Leaderboard │ https://www.aicrowd.com/challenges/ai-blitz-7/leaderboards                    │
│                  │                                                                               │
│ Discussion forum │ https://discourse.aicrowd.com/c/ai-blitz-7                                    │
│                  │                                                                               │
│   Challenge page │ https://www.aicrowd.com/challenges/ai-blitz-7                                 │
└──────────────────┴───────────────────────────────────────────────────────────────────────────────┘
{'submission_id': 126678, 'created_at': '2021-03-15T15:08:03.622Z'}

Additional Links


Comments

You must login before you can post a comment.

Execute