Accuracy not increasing loss not decreasing

Hello,
I am training a pytorch model for sign language classification. There are 29 classes. I am using torchvision augmentation. For weeks I have been trying to train the model. But the loss keeps hovering around the number where it starts, and the accuracy to remains where it started(accuracy is as good as choosing a random label). I first tried smaller models, then bigger models, now pytorch’s inbuilt models, all of which give me the same result. I read online and tried weight decay, different hyperparameters, none of which seem to help.

Here is the code:

!unzip /content/drive/MyDrive/asl.zip -d /content/data

from torch.utils.data import Dataset, DataLoader
import torch
import torchvision.transforms as T
import cv2 as cv
import glob
import numpy as np
import random
import os
from PIL import Image
import torchvision.models as models

os.rename('/content/data/asl_alphabet_train/asl_alphabet_train', '/content/data/train')
os.rename('/content/data/asl_alphabet_test/asl_alphabet_test', '/content/data/test')
os.removedirs('/content/data/asl_alphabet_train')
os.removedirs('/content/data/asl_alphabet_test')

train_transf = T.Compose(
    [
     T.GaussianBlur(9),
     T.RandomRotation((0, 5)),
     T.RandomPerspective(),
     T.RandomHorizontalFlip(),
     T.RandomVerticalFlip(),
     T.ToTensor(),
     T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ]
)
test_transf = T.Compose(
    [
     T.GaussianBlur(9),
     T.RandomRotation((0, 180)),
     T.RandomHorizontalFlip(),
     T.RandomVerticalFlip(),
     T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
     T.ToTensor()
    ]
)

train_data_path ='/content/data/train'
test_data_path ='/content/data/test'

train_img_paths = []
#classes = []
for path in glob.glob(train_data_path + '/*'):
  #classes.append(path.split('/')[-1])
  for img_path in glob.glob(path + '/*'):
    #new_fn = img_path[:20] + img_path.split('/')[-2] + '/' + ''.join([i for i in img_path.split('/')[-1] if i.isdigit() or i == '.' or i == '/']) + 'jpg'
    train_img_paths.append(img_path)

idx_to_class = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
                'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
                'del', 'nothing', 'space']
class_to_idx = {value:key for key,value in enumerate(idx_to_class)}

class DS(Dataset):
  def __init__(self, image_paths, transforms):
    self.image_paths = image_paths
    self.transforms = transforms

  def __len__(self):
    return len(self.image_paths)
  
  def __getitem__(self, idx):
    filename = self.image_paths[idx]
    #image = cv.imread(filename)
    #image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    image = None
    try:
      image = Image.open(filename)
    except:
      filename = self.image_paths[idx+1]
      image = Image.open(filename)
    label = filename.split('/')[-2]
    label = class_to_idx[label]

    if self.transforms is not None:
      image = self.transforms(image)
    
    return image, label

import matplotlib.pyplot as plt
import copy
%matplotlib inline

def vis_augment(dataset, index = 0, samples = 10, cols = 5, rand_img = False):
  dataset = copy.deepcopy(dataset)
  dataset.transforms = T.Compose(
    [
     T.GaussianBlur(9),
     T.RandomRotation((0, 10)),
     T.RandomHorizontalFlip(),
     T.RandomVerticalFlip(),
    ]
  )
  rows = samples//cols
  fig, ax = plt.subplots(nrows = rows, ncols = cols, figsize = (12, 8))
  

  for i in range(samples):
    if rand_img:
      idx = np.random.randint(1,len(train_img_paths))
      image, lab = dataset[idx]
      ax.ravel()[i].imshow(image)
      ax.ravel()[i].set_axis_off()
      ax.ravel()[i].set_title(idx_to_class[lab])
  plt.tight_layout(pad=1)
  plt.show()

vis_augment(train_ds,np.random.randint(1,len(train_img_paths)), rand_img = True) 

lr = 0.1
weight_decay = 0.1
batch_size = 1
num_epochs = 5
num_classes = 29

train_dtldr = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class CNN(nn.Module):
  def __init__(self, input_size = (200, 200), num_classes = 29):
    super(CNN, self).__init__()
    #self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
    #Shape= (256,12,150,150)
    #self.bn1=nn.BatchNorm2d(num_features=12)
    #Shape= (256,12,150,150)
    #self.relu1=nn.ReLU()
    #Shape= (256,12,150,150)
    #self.pool=nn.MaxPool2d(kernel_size=2)
    #Reduce the image size be factor 2
    #Shape= (256,12,75,75)
    #self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
    #Shape= (256,20,75,75)
    #self.relu2=nn.ReLU()
    #Shape= (256,20,75,75)
    #self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
    #Shape= (256,32,75,75)
    #self.bn3=nn.BatchNorm2d(num_features=32)
    #Shape= (256,32,75,75)
    #self.relu3=nn.ReLU()
    #Shape= (256,32,75,75)
    self.model_vgg = models.SqueezeNet(num_classes=29)
    self.model_vgg.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
    

    

  def forward(self, input):
    #output=self.conv1(input)
    #output=self.bn1(output)
    #output=self.relu1(output)
            
    #output=self.pool(output)
            
    #output=self.conv2(output)
    #output=self.relu2(output)
            
    #output=self.conv3(output)
    #output=self.bn3(output)
    #output=self.relu3(output)
    

    #output=output.view(-1,320000)
    #output=self.fc(output)
    output = self.model_vgg(input)
            
    return output

model = CNN((200, 200), 29).to(device)
criterion = nn.CrossEntropyLoss()
adam = optim.Adam(model.model_vgg.parameters(), lr=lr, weight_decay=weight_decay)

total_right=0
step = 1
for i in range(num_epochs):
  for _, (x, y) in enumerate(train_dtldr):
    model.train()
    x = x.to(device)
    y = y.to(device)

    adam.zero_grad()

    scores = model(x).to(device)
    loss = criterion(scores, y).to(device)

    #print(scores.shape, y.shape)

    total_right+=int(torch.sum(torch.argmax(scores)==y.data))

    if step % 100 == 0:
      print(f'epoch:{i}\tstep:{step}')

    if step % 100 == 0:
      print(scores.data, y)
      print(f'-------------\npred = {torch.argmax(scores.data)}\nreal = {y.data}\nloss = {loss} \ntotal_right = {total_right} accuracy = {(total_right/(step+1))*100.00}%')

    loss.backward()

    adam.step()
    step+=1

@XGBoosted_Learner Although, I haven’t gone through the entire code, can you try a small Learning rate, say 1e-3, and see if that solves your issue.

I’ve tried down till 0.0005, didn’t work but i’ll try that, thanks

this is too high.
try 1e-5 or zero first

you cann’t use batch size 1 in train, if you are using batchnorm layer.
also many of optim methods need big batch size for good convergence.

you should try simpler optim method like SGD first,try it with lr .05 and mumentum .9

tried that too but didntwork

I tried this and it works, can anyone tell me what was wrong with my model?