Why my network loss is not decresing? please help

Im trying to train a simple model on CIFAR10 and im using the Pytorch tutorial with changing layers from VGG16. can someone please help me and tell me why my loss is constant…am i doing something wrong?

print(’==> Preparing data…’)
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root=’./data’, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root=’./data’, train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

print(’==> Building model…’)
net = myvggnet.MyVGGNet16()

net = net.to(device)
if device == ‘cuda’:
cudnn.benchmark = True

criterion = nn.NLLLoss()
softmax = nn.Softmax(1)

lr = 0.05
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)

def lr_schedule(optimizer, epoch):
if epoch >= 122:
lr = 0.0005
elif epoch >= 81:
lr = 0.005
else:
lr = 0.05
for param_group in optimizer.param_groups:
param_group[‘lr’] = lr

def train(epoch, optimizer):
print(’\nEpoch: %d’ % epoch)
net.train()
train_loss = 0
correct = 0
total = 0

for batch_idx, (inputs, targets) in enumerate(trainloader):
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    out = net(inputs)
    out_sm = softmax(out)
    
    log = torch.log(out_sm)
    
    loss = criterion(log, targets)
    
    loss.backward()
    optimizer.step()

    train_loss += loss.item()
    _, predicted = out_sm.max(1)
    total += targets.size(0)
    correct += predicted.eq(targets).sum().item()

train_loss = train_loss/(batch_idx+1)
train_acc = 100.*correct/total 

return train_loss, train_acc

def test(epoch, optimizer):
global best_acc
net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
out = net(inputs)
out_sm = softmax(out)
log = torch.log(out_sm)
loss = criterion(log, targets)

        test_loss += loss.item()
        _, predicted = out_sm.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

test_loss = test_loss/(batch_idx+1)
test_acc = 100.*correct/total 

return test_loss, test_acc

now = datetime.datetime.now

acc_history = torch.zeros([200,2], dtype=torch.float32, device=‘cuda’)
loss_history = torch.zeros([200,2], dtype=torch.float32, device=‘cuda’)
count = torch.zeros([200,2], dtype=torch.int32, device=‘cuda’)

for epoch in range(start_epoch, start_epoch+200):
t = now()
lr_schedule(optimizer, epoch)

train_loss, train_acc = train(epoch, optimizer)
print("Train Loss : %.3f | Train Acc : %.3f | Training time: %s" % (train_loss, train_acc, now() - t))
test_loss, test_acc = test(epoch, optimizer)
print("Test Loss : %.3f, Test Acc : %.3f" % (test_loss, test_acc))
acc_history[epoch][0] = train_acc
acc_history[epoch][1] = test_acc

loss_history[epoch][0] = train_loss
loss_history[epoch][1] = test_loss

above is my code, and below is my network

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class MyVGGNet(nn.Module):
def init(self):
super(MyVGGNet, self).init()
self.features = torch.nn.Sequential(
#conv1
torch.nn.Conv2d(3,64,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.3),
torch.nn.Conv2d(64,64,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv2
torch.nn.Conv2d(64,128,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(128,128,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv3
torch.nn.Conv2d(128,256,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(256,256,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(256,256,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv4
torch.nn.Conv2d(256,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv5
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2)
)
self.classifier = torch.nn.Sequential(
torch.nn.Linear(51211,512),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(512,512),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(512,10),
)

def forward(self, x):
    output = self.features(x)
    output = output.view(output.size()[0],-1)
    output = self.classifier(output)
    
    return output

def MyVGGNet16():
return MyVGGNet()

and after start training, output keep comes out like

Epoch: 0
Train Loss : 2.304 | Train Acc : 10.070 | Training time: 0:00:16.129584
Test Loss : 2.303, Test Acc : 10.000

Epoch: 1
Train Loss : 2.304 | Train Acc : 9.890 | Training time: 0:00:16.121169
Test Loss : 2.304, Test Acc : 10.000

Epoch: 2
Train Loss : 2.303 | Train Acc : 10.288 | Training time: 0:00:16.217818
Test Loss : 2.304, Test Acc : 10.000

Epoch: 3
Train Loss : 2.304 | Train Acc : 10.028 | Training time: 0:00:16.199693
Test Loss : 2.303, Test Acc : 10.000

Epoch: 4
Train Loss : 2.304 | Train Acc : 9.660 | Training time: 0:00:16.242965
Test Loss : 2.303, Test Acc : 10.000

loss is 2.303 and not changing
I tried to find out why, and I found out my network is not working
Is anyone can tell me why is this happening?
please let me know

There r too many dropouts. Especially too many dropouts after convolution. Try removing some of the dropouts.

I had the same error when I moved from Keras to Pytorch. I had a Keras model with Dropout like you (p=0.4, p=0.5), the model was learning correctly and I switched to Pytorch with the same data, same model and same Dropout value, and the model wasn’t learning, the loss didn’t decrease.
So I removed the dropout and that was working well.

I didn’t seek why there is a huge difference with Dropout.

I guess it’s because how they implement dropout is different. Like perhaps in Keras a 0.4 Dropout means abandoning 40% but in Pytorch it means abandoning 60%. I’m just guessing, I’m not sure whether they are different or not.

I removed all the dropout layer, but it didn’t work…
I’ll try writing network again
thanks for your help

I removed all the dropout layer, but it didn’t work
I think network and forward path part is the reason
I’ll write it again
thanks!

Btw instead of using a.softmax().log(), u should use the “logSoftmax”.