Environment Classification
Solution for submission 156842
A detailed solution for submission 156842 submitted for challenge Environment Classification
Environment Classification
In this challenge, you will have images of a self driving car moving through a town in different weather conditions. Your goal will be to classify the environment into 5 different classes ( using unsupervised methonds ), 1 means the weather is really good for a self driving car while 5 means the weather is very challenging for a self driving car.
- Unsupvised Image Classification
Image clustering using Transfer learning¶
Resnet50 + Kmeans based image clustering model¶
https://towardsdatascience.com/image-clustering-using-transfer-learning-df5862779571
In [1]:
!pip install -q aicrowd-cli
%load_ext aicrowd.magic
In [2]:
%aicrowd login
In [3]:
# Downloading the Dataset
!rm -rf data
!mkdir data
%aicrowd ds dl -c environment-classification -o data
In [4]:
# Unzipping and Organising the datasets
!unzip data/images.zip -d data/images > /dev/null
In [5]:
import os
import csv
from pathlib import Path
import random
import time
import pandas as pd
import numpy as np
In [6]:
DATA_DIR = "data/images/"
Model¶
In [7]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Sequential
resnet = ResNet50(include_top=False, pooling='avg', weights='imagenet')
my_new_model = Sequential()
my_new_model.add(resnet)
In [8]:
# Say not to train first layer (ResNet) model. It is already trained
my_new_model.layers[0].trainable = False
Images Preprocessing¶
In [9]:
%%time
from tensorflow.keras.applications.resnet50 import preprocess_input
import cv2
import numpy as np
resnet_feature_list = []
images = [f for f in os.listdir(DATA_DIR)]
for image in images:
file = DATA_DIR+image
#print(file)
im = cv2.imread(file)
#im = cv2.resize(im,(256,256))
img = preprocess_input(np.expand_dims(im.copy(), axis=0))
resnet_feature = my_new_model.predict(img)
resnet_feature_np = np.array(resnet_feature)
resnet_feature_list.append(resnet_feature_np.flatten())
array = np.array(resnet_feature_list)
In [10]:
array.shape
Out[10]:
In [11]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, random_state=None, n_init=50, max_iter=1000).fit(array) #
print(kmeans.labels_)
Submission¶
In [12]:
img_ids_list = [f[:-4] for f in images]
In [15]:
img_ids_list[0]
Out[15]:
In [23]:
pre_sub = {'ImageID':img_ids_list, "label":kmeans.labels_}
pre_sub = pd.DataFrame(pre_sub)
pre_sub = pre_sub.astype(int)
pre_sub = pre_sub.sort_values(by=['ImageID'])
pre_sub
Out[23]:
In [24]:
pre_sub.label.value_counts()
Out[24]:
It is clear that 20 images are missclassified, we get rid of them and repeat training process¶
In [33]:
to_del = np.array(pre_sub[pre_sub.label == 0].ImageID)
to_del = set(to_del)
images_clean = []
for image in images:
if int(image[:-4]) not in to_del:
images_clean.append(image)
len(images_clean)
Out[33]:
In [34]:
%%time
from tensorflow.keras.applications.resnet50 import preprocess_input
import cv2
import numpy as np
resnet_feature_list = []
# images = [f for f in os.listdir(DATA_DIR)]
for image in images_clean:
file = DATA_DIR+image
#print(file)
im = cv2.imread(file)
#im = cv2.resize(im,(256,256))
img = preprocess_input(np.expand_dims(im.copy(), axis=0))
resnet_feature = my_new_model.predict(img)
resnet_feature_np = np.array(resnet_feature)
resnet_feature_list.append(resnet_feature_np.flatten())
array = np.array(resnet_feature_list)
In [36]:
array.shape
Out[36]:
In [37]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, random_state=None, n_init=50, max_iter=1000).fit(array) #
In [38]:
img_ids_list_clean = [f[:-4] for f in images_clean]
In [84]:
pre_sub_2 = {'ImageID':img_ids_list_clean, "label":kmeans.labels_}
pre_sub_2 = pd.DataFrame(pre_sub_2)
pre_sub_2 = pre_sub_2.astype(int)
rnd_labels = []
for i in range(len(to_del)):
rnd_labels.append(random.randint(0,4))
missing_labels = [3, 3, 2, 1, 1, 0, 1, 2, 1, 2, 2, 2, 3, 2, 1, 4, 0, 2, 0, 3]
ending = {'ImageID':list(to_del), 'label':rnd_labels}
ending = pd.DataFrame(ending)
submission = pd.concat([pre_sub_2, ending], axis=0)
submission = submission.sort_values(by=['ImageID'])
Out[84]:
In [65]:
!rm -rf assets
!mkdir assets
submission.to_csv(os.path.join("assets", "submission.csv"), index=False)
In [ ]:
Making Direct Submission thought Aicrowd CLI¶
In [66]:
In [ ]:
Content
Comments
You must login before you can post a comment.