Scene Segmentation
Solution for submission 156892
A detailed solution for submission 156892 submitted for challenge Scene Segmentation
In [1]:
# Pytorch
import torch
from torch import nn
import segmentation_models_pytorch as smp
from torch.utils.data import Dataset, DataLoader
# Reading Dataset, vis and miscellaneous
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
import torch.nn as nn
from natsort import natsorted
from tqdm.notebook import tqdm
import cv2
from torch.utils.data import random_split
from torch.utils.data.sampler import SubsetRandomSampler
DEBUG = False
#size = (256, 256)
In [2]:
class SemanticSegmentationDataset(Dataset):
def __init__(self, img_directory = None, label_directory = None, train = True):
self.img_directory = img_directory
self.label_directory = label_directory
if img_directory != None:
if train:
self.img_list = natsorted(os.listdir(img_directory))
else:
self.img_list = natsorted(os.listdir(img_directory))
if train:
self.label_list = natsorted(os.listdir(label_directory))
self.train = train
self.labels = list(range(0, 25))
def __len__(self):
return len(self.img_list)
def __getitem__(self, idx):
# Reading the image
img = Image.open(os.path.join(self.img_directory, self.img_list[idx]))
#img = img.resize(size)
img = img.convert("L")
if self.train == True:
# Readiding the mak image
mask = Image.open(os.path.join(self.label_directory, self.label_list[idx]))
#mask = mask.resize(size)
# mask.show()
img = np.array(img, dtype = np.float32)
mask = np.array(mask, dtype = np.float32)
# Change image channel ordering
img = img[np.newaxis, :, :]
# Normalizing images
img = torch.from_numpy(img)
img = img.float() / 255
binary_mask = np.array([(mask == v) for v in list(self.labels)])
binary_mask = np.stack(binary_mask, axis = -1).astype('float')
mask_preprocessed = binary_mask.transpose(2, 0, 1)
mask_preprocessed = torch.from_numpy(mask_preprocessed)
#img = img.unsqueeze(1)
return img, mask_preprocessed
# If reading test dataset, only return image
else:
img = np.array(img, dtype=np.float32)
img = img[np.newaxis, :, :]
# img = np.moveaxis(img, -1, 0)
# Normalizing images
img = torch.from_numpy(img)
img = img.float() / 255
return img
In [3]:
from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold, cross_val_score, train_test_split
from shutil import copyfile
from models.efficientdet import EfficientDet
img_directory = "./data/train/image"
img_ind = os.listdir(img_directory)
img_ind = np.array(img_ind)
seeds = 1
n_splits = 5
if torch.cuda.is_available():
device = 'cuda'
else:
device = 'cpu'
# using DiceLoss
loss = smp.utils.losses.DiceLoss()
# using multiple metrics to train the model
metrics = [
smp.utils.metrics.IoU(threshold = 0.5),
smp.utils.metrics.Fscore(threshold = 0.5),
smp.utils.metrics.Accuracy(threshold = 0.5),
smp.utils.metrics.Recall(threshold = 0.5),
smp.utils.metrics.Precision(threshold = 0.5),
]
# Using Adam optimizer
#optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.006)
#encoder = 'resnet50'
#encoder_weights ='imagenet'
encoder = 'timm-efficientnet-b0'
#encoder = 'resnet50'
#encoder = 'resnet34'
encoder_weights = 'imagenet'
ACTIVATION = "softmax2d"
lr = 0.0001
I = 0
#for seed in range(seeds):
#random_state = 10
#number_epoches = 3
#0.932
#random_state = 10
#number_epoches = 4
#0.903
#random_state = 0
#number_epoches = 5
#
#for model_name in ["FPN", "Unet"]:
for model_name in ["FPN"]:
kf = KFold(n_splits = n_splits, shuffle = True, random_state = 5)
#for train_index, test_index in kf.split(img_ind):
if DEBUG:
train_index, test_index = list(kf.split(img_ind))[0][1], list(kf.split(img_ind))[0][0]
number_epoches = 20
test_index = test_index[:200]
print(len(train_index), len(test_index))
else:
train_index, test_index = list(kf.split(img_ind))[0][0], list(kf.split(img_ind))[0][1]
number_epoches = 25
print(len(train_index), len(test_index))
if True:
!rm -rf train
!mkdir train
!rm -rf train/image
!mkdir train/image
!rm -rf train/segmentation
!mkdir train/segmentation
!rm -rf valid
!mkdir valid
!rm -rf valid/image
!mkdir valid/image
!rm -rf valid/segmentation
!mkdir valid/segmentation
X_trg, X_val = img_ind[train_index], img_ind[test_index]
for i in X_trg:
copyfile("./data/train/image/" + i, './train/image/' + i)
i = i.replace('jpg', 'png')
copyfile("./data/train/segmentation/" + i, './train/segmentation/' + i)
for i in X_val:
copyfile("./data/train/image/" + i, './valid/image/' + i)
i = i.replace('jpg', 'png')
copyfile("./data/train/segmentation/" + i, './valid/segmentation/' + i)
# Creating the training dataset
train_dataset = SemanticSegmentationDataset(
img_directory = "./train/image",
label_directory = "./train/segmentation"
)
train_loader = DataLoader(
train_dataset,
batch_size = 4,
num_workers = 0,
shuffle = False,
drop_last = True
)
# Creating the validation dataset
valid_dataset = SemanticSegmentationDataset(
img_directory = "./valid/image",
label_directory = "./valid/segmentation"
)
valid_loader = DataLoader(
valid_dataset,
batch_size = 8,
num_workers = 0,
shuffle = False,
drop_last = True
)
aux_params = dict(
pooling='avg', # one of 'avg', 'max'
dropout = 0.1, # dropout ratio, default is None
activation = 'softmax', # activation function, default is None softmax sigmoid
classes = len(train_dataset.labels), # define number of output labels
)
if model_name == 'Unet':
model = smp.Unet(
encoder_name = encoder,
encoder_weights = encoder_weights,
classes = len(train_dataset.labels),
in_channels = 1,
activation = ACTIVATION,
#aux_params = aux_params,
)
model = torch.load("Unet_best_model.pth")
"""model = EfficientDet(
num_classes = len(train_dataset.labels),
network = 'efficientdet-d0',
#batch_size = 32
)"""
elif model_name == 'FPN':
model = smp.FPN(
encoder_name = encoder,
encoder_weights = encoder_weights,
classes = len(train_dataset.labels),
in_channels = 1,
activation = ACTIVATION,
#aux_params = aux_params
)
model = torch.load("FPN-0.981.pth")
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.006)
train_epoch = smp.utils.train.TrainEpoch(
model,
loss = loss,
metrics = metrics,
optimizer = optimizer,
device = device,
verbose = True,
)
valid_epoch = smp.utils.train.ValidEpoch(
model,
loss = loss,
metrics = metrics,
#optimizer = optimizer,
device = device,
verbose = True,
)
full_name = model_name + '-' + str(I)
print("\nStart train model : ", model_name)
#I += 1
est = 0
max_score = 0
for i in range(0, number_epoches):
print('\nEpoch: {}'.format(i))
train_logs = train_epoch.run(train_loader)
valid_logs = valid_epoch.run(valid_loader)
# do something (save model, change lr, etc.)
if max_score < valid_logs['fscore']:
max_score = valid_logs['fscore']
torch.save(model, full_name + '.pth')
print('Model saved!')
else:
est += 1
if i == 25:
optimizer.param_groups[0]['lr'] = 1e-5
print('Decrease decoder learning rate to 1e-5!')
In [3]:
"""
FPN0 = torch.load("FPN-0.pth")
FPN1 = torch.load("FPN-1.pth")
FPN2 = torch.load("FPN-2.pth")
FPN3 = torch.load("FPN-3.pth")
FPN4 = torch.load("FPN-4.pth")
"""
FPN0 = torch.load("FPN-0.981.pth")
FPN1 = torch.load("FPN-0.981-2.pth")
In [4]:
# Creating the testing dataset
test_dataset = SemanticSegmentationDataset(
img_directory = "data/test/image",
train = False
)
test_loader = DataLoader(
test_dataset,
batch_size = 1,
num_workers = 2,
shuffle = False,
drop_last = False
)
In [5]:
if torch.cuda.is_available():
device = 'cuda'
else:
device = 'cpu'
# Generating Model Predictions
!rm -rf segmentation
!mkdir segmentation
for n, batch in enumerate(tqdm(test_loader)):
# Getting the predictions
predictions0 = FPN0.predict(batch.to(device)).cpu()
predictions1 = FPN1.predict(batch.to(device)).cpu()
# Converting the predictions to right format
prediction_mask0 = (predictions0.squeeze().cpu().numpy())
prediction_mask0 = np.transpose(prediction_mask0, (1, 2, 0))
prediction_mask1 = (predictions1.squeeze().cpu().numpy())
prediction_mask1 = np.transpose(prediction_mask1, (1, 2, 0))
prediction_mask = (prediction_mask0 + prediction_mask1) / 2
#prediction_mask = prediction_mask1
# Getting individual channel and combining them into single image
prediction_mask_gray = np.zeros((prediction_mask.shape[0], prediction_mask.shape[1]))
for ii in range(prediction_mask.shape[2]):
prediction_mask_gray = prediction_mask_gray + ii*prediction_mask[:,:,ii].round()
# Saving the image
prediction_mask_gray = Image.fromarray(prediction_mask_gray.astype(np.uint8))
prediction_mask_gray.save(os.path.join("segmentation", f"{n}.png"))
In [ ]:
In [ ]:
In [7]:
%load_ext aicrowd.magic
%aicrowd login
In [ ]:
Content
Comments
You must login before you can post a comment.