Environment Classification
Solution for submission 154921
A detailed solution for submission 154921 submitted for challenge Environment Classification
In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy
import gc
import os
from glob import glob
import pickle
import random
import shutil
import seaborn as sns
from collections import Counter
from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold, cross_val_score, train_test_split
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.metrics import roc_auc_score, f1_score, log_loss, accuracy_score, matthews_corrcoef
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
#import category_encoders as ce
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from sklearn.metrics import classification_report
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)
warnings.simplefilter(action = 'ignore', category = DeprecationWarning)
warnings.simplefilter(action = 'ignore', category = UserWarning)
warnings.simplefilter(action = 'ignore', category = RuntimeWarning)
warnings.filterwarnings("ignore", message = "numpy.dtype size changed")
warnings.filterwarnings("ignore", message = "numpy.ufunc size changed")
pd.options.mode.chained_assignment = None
In [ ]:
In [2]:
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
import seaborn as sns
from glob import glob
from natsort import natsorted
import cv2
from PIL import Image
images_folder = "./data/images"
images_list = natsorted(glob(images_folder + "/*"))
img = Image.open(random.choice(images_list))
#img
In [3]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Sequential
resnet = ResNet50(include_top = False, pooling = 'avg', weights = 'imagenet')
rmodel = Sequential()
rmodel.add(resnet)
rmodel.layers[0].trainable = False
In [ ]:
In [4]:
%%time
from tensorflow.keras.applications.resnet50 import preprocess_input
import cv2
import numpy as np
resnet_feature_list = []
for file in tqdm(images_list):
#print(file)
im = cv2.imread(file)
#im = cv2.resize(im,(256,256))
img = preprocess_input(np.expand_dims(im.copy(), axis = 0))
resnet_feature = rmodel.predict(img)
resnet_feature_np = np.array(resnet_feature)
resnet_feature_list.append(resnet_feature_np.flatten())
array = np.array(resnet_feature_list)
In [5]:
pd.DataFrame(array).head()
Out[5]:
In [6]:
model = KMeans(n_clusters = 5, random_state = 3)
predictions = model.fit_predict(array)
predictions.shape
Out[6]:
In [7]:
df = pd.DataFrame({"ImageID":range(700), "label":predictions})
df['ImageID'] = df["ImageID"].astype(int)
df['label'] = df["label"].astype(int)
df = df.sort_values("ImageID").reset_index(drop = True)
df
Out[7]:
In [8]:
!rm -rf assets
!mkdir assets
df.to_csv(os.path.join("assets", "submission.csv"), index = False)
In [ ]:
In [9]:
Counter(df.label)
Out[9]:
In [ ]:
In [10]:
%load_ext aicrowd.magic
%aicrowd login
In [ ]:
Content
Comments
You must login before you can post a comment.