I have been trying to do Multilabel classification for 19 different classes. I used custom image data loader.
I used Densenet201 as backbone for inference. The loss function I used is BCEWithLogitsLoss. Below is my all relevant code.
Edit: I almost forgot, I had a dataframe which had ID as one column and labels in the other. I one hot encoded the data so that I could get a sparse matrix corresponding to every image.
Image Data Loader
class ImageDataLoader(Dataset):
def __init__(self, dframe, transform = None):
self.df = dframe
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
img = Image.open(self.df['ID'][idx])
img = np.asarray(img)
img = img/255
if self.transform:
img = self.transform(img)
label = torch.FloatTensor(df.drop(columns = ['ID']).loc[idx])
sample = {'img': img, 'label': label}
return sample
tfms = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.07237246, 0.04476176, 0.07661699], [0.17179589, 0.10284516, 0.14199627]),
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(10)])
train_loader = ImageDataLoader(train_df, transform = tfms)#1000 images
train = DataLoader(train_loader, batch_size = 16, shuffle = False)
Inference Model
np.random.seed(1)
torch.manual_seed(1)
model = models.densenet201(pretrained = True)#Downloads model with pretrained config
for param in model.parameters():
param.requires_grad = False
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(1920, 1024)),
('relu', nn.ReLU()),
('fc2', nn.Linear(1024, 512)),
('relu', nn.ReLU()),
('fc3', nn.Linear(512, 19))
]))
model.classifier = classifier
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 3e-2)
#lamda = lambda epoch: 10**(epoch/2)
#scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda = lamda, last_epoch=-1, verbose = True)
model = model.cuda()
Training
epochs = 16
steps = 0
train_losses, val_losses, lrs = [], [], []
for e in tqdm(range(epochs)):
#lrs.append(optimizer.param_groups[0]["lr"])
for sample in train:
images, labels = sample['img'].to(torch.float32).cuda(), sample['label'].cuda()
optimizer.zero_grad()
ps = model.forward(images)
loss = criterion(ps, labels)
loss.backward()
optimizer.step()
images, labels = images.cpu(), labels.cpu()
else:
with torch.no_grad():
for sample in val:
images, labels = sample['img'].to(torch.float32).cuda(), sample['label'].cuda()
ps = model.forward(images)
val_loss = criterion(ps, labels)
top_p, top_class = ps.topk(1, dim = 1)
#equals = top_class == labels.view(*top_class.shape)
#accuracy = torch.mean(equals.type(torch.FloatTensor))
images, labels = images.cpu(), labels.cpu()
train_losses.append(loss)
val_losses.append(val_loss)
#accuracies.append(accuracy)
#scheduler.step()
print('Epoch..{}/{}'.format(e + 1, epochs),
'Training Loss..{:.3f}'.format(train_losses[e]),
'Test Loss..{:.3f}'.format(val_losses[e]))
#'Accuracy..{:.3f}'.format(accuracies[e]))
model = model.cpu()
##RESULTS
6%|▋ | 1/16 [00:57<14:25, 57.69s/it]
Epoch..1/16 Training Loss..0.252 Test Loss..0.255
12%|█▎ | 2/16 [01:40<11:27, 49.12s/it]
Epoch..2/16 Training Loss..0.238 Test Loss..0.252
19%|█▉ | 3/16 [02:24<10:05, 46.56s/it]
Epoch..3/16 Training Loss..0.241 Test Loss..0.246
25%|██▌ | 4/16 [03:08<09:06, 45.57s/it]
Epoch..4/16 Training Loss..0.241 Test Loss..0.246
31%|███▏ | 5/16 [03:53<08:18, 45.33s/it]
Epoch..5/16 Training Loss..0.240 Test Loss..0.248
38%|███▊ | 6/16 [04:38<07:32, 45.21s/it]
Epoch..6/16 Training Loss..0.240 Test Loss..0.250
44%|████▍ | 7/16 [05:22<06:44, 44.95s/it]
Epoch..7/16 Training Loss..0.239 Test Loss..0.252
50%|█████ | 8/16 [06:07<05:58, 44.83s/it]
Epoch..8/16 Training Loss..0.239 Test Loss..0.253
56%|█████▋ | 9/16 [06:52<05:15, 45.05s/it]
Epoch..9/16 Training Loss..0.238 Test Loss..0.255
62%|██████▎ | 10/16 [07:36<04:28, 44.69s/it]
Epoch..10/16 Training Loss..0.238 Test Loss..0.256
69%|██████▉ | 11/16 [08:20<03:42, 44.52s/it]
Epoch..11/16 Training Loss..0.238 Test Loss..0.257
75%|███████▌ | 12/16 [09:05<02:57, 44.43s/it]
Epoch..12/16 Training Loss..0.238 Test Loss..0.257
81%|████████▏ | 13/16 [09:48<02:12, 44.18s/it]
Epoch..13/16 Training Loss..0.238 Test Loss..0.258
88%|████████▊ | 14/16 [10:32<01:27, 43.98s/it]
Epoch..14/16 Training Loss..0.238 Test Loss..0.258
94%|█████████▍| 15/16 [11:15<00:43, 43.75s/it]
Epoch..15/16 Training Loss..0.238 Test Loss..0.258
100%|██████████| 16/16 [12:00<00:00, 45.00s/it]
Epoch..16/16 Training Loss..0.238 Test Loss..0.258
Code for prediction
with torch.no_grad():
model.cpu()
img = Image.open(df['ID'][9])
img = tfms(img)
img = img.reshape(1, 3, 224, 224)
print(F.sigmoid(model.forward(img)))
tensor([[0.4760, 0.0375, 0.0830, 0.0524, 0.0395, 0.0595, 0.0410, 0.0753, 0.0423,
0.0370, 0.0197, 0.0011, 0.1263, 0.0783, 0.0797, 0.0187, 0.2127, 0.0200,
0.0035]]) #Output for image 9
tensor([[0.4760, 0.0375, 0.0830, 0.0524, 0.0395, 0.0595, 0.0410, 0.0753, 0.0423,
0.0370, 0.0197, 0.0011, 0.1263, 0.0783, 0.0797, 0.0187, 0.2127, 0.0200,
0.0035]])#Output for image 4
Please help, thank you!