Loss not changing for basic cnn model

I am trying to revise pytorch. I am trying some examples which I could do before. But somehow now the model’s loss is not changing. It is not about the learning rate or anything. I need urgent help. Anyone who could help me?

import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F

transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize((1307,),(0.3801))])

train = datasets.MNIST(root=‘.’,train=True,transform=transforms,download=True)
test = datasets.MNIST(root=‘.’,train=False,transform=transforms,download=True)

trainloader = DataLoader(dataset = train, batch_size=60000, shuffle=True)
testloader = DataLoader(dataset= test, batch_size=10000, shuffle=False)

class Model(nn.Module):
def init(self):
super(Model,self).init()
self.cnn1 = nn.Conv2d(1,8,3,1,1)
self.batchnorm1 = nn.BatchNorm2d(8)
self.relu = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(2,2)
self.cnn2 = nn.Conv2d(8,32,3,1,1)
self.batchnorm2 = nn.BatchNorm2d(32)
self.fc1 = nn.Linear(1568,600)
self.fc2 = nn.Linear(600,10)
self.dropout = nn.Dropout(0.5)
self.softmax = nn.Softmax(dim=1)

def forward(self,out):
    out = self.cnn1(out)
    out = self.batchnorm1(out)
    out = self.relu(out)
    out = self.maxpool1(out)
    out = self.cnn2(out)
    out = self.batchnorm2(out)
    out = self.relu(out)
    out = self.maxpool1(out)
    out = out.view(-1,1568)
    out = self.fc1(out)
    out = self.relu(out)
    out = self.dropout(out)
    out = self.softmax(self.fc2(out))
    # breakpoint()
    return out

model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)
model = model.cuda()
print(f"GPU is available is {torch.cuda.is_available()} statement")

for epochs in range(10):
correct = 0
model.train()
for (x,y) in trainloader:
x = x.float().cuda()
y = y.cuda()
output = model(x)
optimizer.zero_grad()
loss = criterion(output,y)
breakpoint()
loss.backward()
optimizer.step()
with torch.no_grad():
_,predicted = torch.max(output,1)
correct += (predicted==y).sum()
print(f"epoch:{epochs} train_acc {100*correct/len(train)}")

model.eval()
correct=0
for (x,y) in testloader:
    x,y=x.cuda(),y.cuda()
    output = model(x)
    _,predicted = torch.max(output,1)
    correct += (predicted==y).sum()
print(f"epoch:{epochs+1} test_acc {100*correct/len(test)}")

nn.CrossEntropyLoss expects raw logits so remove the nn.Softmax(dim=1) from your model and rerun your training.

import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F

transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize((1307,),(0.3081))])

train = datasets.MNIST(root=‘.’,train=True,transform=transforms,download=True)
test = datasets.MNIST(root=‘.’,train=False,transform=transforms,download=True)

trainloader = DataLoader(dataset = train, batch_size=64, shuffle=True)
testloader = DataLoader(dataset= test, batch_size=64, shuffle=False)

class Model(nn.Module):
def init(self):
super(Model,self).init()
self.cnn1 = nn.Conv2d(1,8,3,1,1)
self.batchnorm1 = nn.BatchNorm2d(8)
self.relu = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(2,2)
self.cnn2 = nn.Conv2d(8,32,3,1,1)
self.batchnorm2 = nn.BatchNorm2d(32)
self.fc1 = nn.Linear(1568,600)
self.fc2 = nn.Linear(600,10)
self.dropout = nn.Dropout(0.5)

def forward(self,x):
    out = self.cnn1(x)
    out = self.batchnorm1(out)
    out = self.relu(out)
    out = self.maxpool1(out)
    out = self.cnn2(out)
    out = self.batchnorm2(out)
    out = self.relu(out)
    out = self.maxpool1(out)
    out = out.view(out.size(0), -1)
    out = self.fc1(out)
    out = self.relu(out)
    out = self.dropout(out)
    out = self.fc2(out)
    # breakpoint()
    return out

model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)
model = model.cuda()
print(f"GPU is available is {torch.cuda.is_available()} statement")

for epochs in range(10):
correct = 0
model.train()
for (x,y) in trainloader:
x = x.float().cuda()
y = y.cuda()
optimizer.zero_grad()
output = model(x)
loss = criterion(output,y)
# breakpoint()
loss.backward()
optimizer.step()
with torch.no_grad():
_,predicted = torch.max(output,1)
correct += (predicted==y).sum().item()
print(f"epoch:{epochs} train_acc {100*correct/60000}")

model.eval()
correct_test=0
for (x,y) in testloader:
    x,y=x.cuda(),y.cuda()
    output = model(x)
    _,predicted = torch.max(output,1)
    correct_test += (predicted==y).sum().item()
print(f"epoch:{epochs+1} test_acc {100*correct_test/10000}")

I have changed it. Still no luck