I am trying to implement pretrained densenet121 on a image classification task with 789 images and 3 labels and received an error. Below is my code.
# Import libraries
import pandas as pd # For importing dataset
import numpy as np # For matrix operation
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cv2
import matplotlib.pyplot as plt
import torchvision
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms, models
import copy
import tqdm
from PIL import Image
%matplotlib inline
mix_dataset_dir = '/kaggle/input/complete-mix-dataset/Mix_images'
mix_dataset_files = os.listdir(mix_dataset_dir)
mix_dataset_files
Below are few images names I have shared for reference
['Teenagers (95).jpg',
'Adults (254).jpg',
'Teenagers (108).jpg',
'Teenagers (175).jpg',
'Teenagers (126).jpg',
Code continued
class MixDataset(Dataset):
def __init__(self, file_list, dir, mode='train', transform = None):
self.file_list = file_list
self.dir = dir
self.mode= mode
self.transform = transform
def __len__(self):
return len(self.file_list)
def __getitem__(self, idx):
img = Image.open(os.path.join(self.dir, self.file_list[idx])).convert('RGB')
if self.mode == 'train':
if 'Adults' in self.file_list[idx]:
self.label = 1
elif 'Teenagers' in self.file_list[idx]:
self.label = 2
else:
self.label = 3
if self.transform:
img = self.transform(img)
if self.mode == 'train':
img = img.numpy()
return img.astype('float32'), self.label
else:
img = img.numpy()
return img.astype('float32'), self.file_list[idx]
data_transform = transforms.Compose([
transforms.Resize((256,256)),
transforms.ColorJitter(),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.Resize((128,128)),
transforms.ToTensor()
])
mix = MixDataset(mix_dataset_files, mix_dataset_dir , transform = data_transform)
densenet_model = models.densenet121(pretrained = True)
densenet_model
for param in densenet_model.parameters():
param.requires_grad = True
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(1024, 512)),
('relu1', nn.ReLU()),
('fc2', nn.Linear(512, 256)),
('relu2', nn.ReLU()),
('fc3', nn.Linear(256, 3)),
('output', nn.LogSoftmax(dim = 1))
]))
densenet_model.classifier = classifier
criterion = nn.NLLLoss()
optimizer = optim.Adam(densenet_model.classifier.parameters(), lr = 0.003)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200, 300], gamma=0.5)
train_loader = DataLoader(mix, batch_size=32, shuffle = True)
densenet_model.to('cuda')
epochs = 3
itr = 1
p_itr = 200
densenet_model.train()
total_loss = 0
loss_list = []
acc_list = []
for epoch in range(epochs):
for samples, labels in train_loader:
samples, labels = samples.to('cuda'), labels.to('cuda')
optimizer.zero_grad()
output = densenet_model(samples)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
scheduler.step()
if itr%p_itr == 0:
pred = torch.argmax(output, dim=1)
correct = pred.eq(labels)
acc = torch.mean(correct.float())
print('[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Accuracy: {:.3f}'.format(epoch+1, epochs, itr, total_loss/p_itr, acc))
loss_list.append(total_loss/p_itr)
acc_list.append(acc)
total_loss = 0
itr += 1
plt.plot(loss_list, label='loss')
plt.plot(acc_list, label='accuracy')
plt.legend()
plt.title('training loss and accuracy')
plt.show()
Below is the complete error log which I am getting
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-9-0e23ed2d21f0> in <module>
14 output = densenet_model(samples)
15 loss = criterion(output, labels)
---> 16 loss.backward()
17 optimizer.step()
18 total_loss += loss.item()
/opt/conda/lib/python3.7/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
196 products. Defaults to ``False``.
197 """
--> 198 torch.autograd.backward(self, gradient, retain_graph, create_graph)
199
200 def register_hook(self, hook):
/opt/conda/lib/python3.7/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
92 grad_tensors = list(grad_tensors)
93
---> 94 grad_tensors = _make_grads(tensors, grad_tensors)
95 if retain_graph is None:
96 retain_graph = create_graph
/opt/conda/lib/python3.7/site-packages/torch/autograd/__init__.py in _make_grads(outputs, grads)
34 if out.numel() != 1:
35 raise RuntimeError("grad can be implicitly created only for scalar outputs")
---> 36 new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
37 else:
38 new_grads.append(None)
RuntimeError: CUDA error: device-side assert triggered
I have also gone through this link https://towardsdatascience.com/cuda-error-device-side-assert-triggered-c6ae1c8fa4c3 which fix the same problem I am facing and incorporated the changes but this thing also does not help.
Please let me know where am I going wrong.
Also I am working on kaggle kernels.