Im trying to train a simple model on CIFAR10 and im using the Pytorch tutorial with changing layers from VGG16. can someone please help me and tell me why my loss is constant…am i doing something wrong?
print(’==> Preparing data…’)
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(root=’./data’, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root=’./data’, train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
print(’==> Building model…’)
net = myvggnet.MyVGGNet16()
net = net.to(device)
if device == ‘cuda’:
cudnn.benchmark = True
criterion = nn.NLLLoss()
softmax = nn.Softmax(1)
lr = 0.05
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)
def lr_schedule(optimizer, epoch):
if epoch >= 122:
lr = 0.0005
elif epoch >= 81:
lr = 0.005
else:
lr = 0.05
for param_group in optimizer.param_groups:
param_group[‘lr’] = lr
def train(epoch, optimizer):
print(’\nEpoch: %d’ % epoch)
net.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
out = net(inputs)
out_sm = softmax(out)
log = torch.log(out_sm)
loss = criterion(log, targets)
loss.backward()
optimizer.step()
train_loss += loss.item()
_, predicted = out_sm.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
train_loss = train_loss/(batch_idx+1)
train_acc = 100.*correct/total
return train_loss, train_acc
def test(epoch, optimizer):
global best_acc
net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(testloader):
inputs, targets = inputs.to(device), targets.to(device)
out = net(inputs)
out_sm = softmax(out)
log = torch.log(out_sm)
loss = criterion(log, targets)
test_loss += loss.item()
_, predicted = out_sm.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
test_loss = test_loss/(batch_idx+1)
test_acc = 100.*correct/total
return test_loss, test_acc
now = datetime.datetime.now
acc_history = torch.zeros([200,2], dtype=torch.float32, device=‘cuda’)
loss_history = torch.zeros([200,2], dtype=torch.float32, device=‘cuda’)
count = torch.zeros([200,2], dtype=torch.int32, device=‘cuda’)
for epoch in range(start_epoch, start_epoch+200):
t = now()
lr_schedule(optimizer, epoch)
train_loss, train_acc = train(epoch, optimizer)
print("Train Loss : %.3f | Train Acc : %.3f | Training time: %s" % (train_loss, train_acc, now() - t))
test_loss, test_acc = test(epoch, optimizer)
print("Test Loss : %.3f, Test Acc : %.3f" % (test_loss, test_acc))
acc_history[epoch][0] = train_acc
acc_history[epoch][1] = test_acc
loss_history[epoch][0] = train_loss
loss_history[epoch][1] = test_loss
above is my code, and below is my network
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class MyVGGNet(nn.Module):
def init(self):
super(MyVGGNet, self).init()
self.features = torch.nn.Sequential(
#conv1
torch.nn.Conv2d(3,64,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.3),
torch.nn.Conv2d(64,64,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv2
torch.nn.Conv2d(64,128,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(128,128,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv3
torch.nn.Conv2d(128,256,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(256,256,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(256,256,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv4
torch.nn.Conv2d(256,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2),
#conv5
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.Dropout(0.4),
torch.nn.Conv2d(512,512,3,padding=1,bias=False),
torch.nn.ReLU(),
torch.nn.AvgPool2d(2,2)
)
self.classifier = torch.nn.Sequential(
torch.nn.Linear(51211,512),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(512,512),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(512,10),
)
def forward(self, x):
output = self.features(x)
output = output.view(output.size()[0],-1)
output = self.classifier(output)
return output
def MyVGGNet16():
return MyVGGNet()
and after start training, output keep comes out like
Epoch: 0
Train Loss : 2.304 | Train Acc : 10.070 | Training time: 0:00:16.129584
Test Loss : 2.303, Test Acc : 10.000
Epoch: 1
Train Loss : 2.304 | Train Acc : 9.890 | Training time: 0:00:16.121169
Test Loss : 2.304, Test Acc : 10.000
Epoch: 2
Train Loss : 2.303 | Train Acc : 10.288 | Training time: 0:00:16.217818
Test Loss : 2.304, Test Acc : 10.000
Epoch: 3
Train Loss : 2.304 | Train Acc : 10.028 | Training time: 0:00:16.199693
Test Loss : 2.303, Test Acc : 10.000
Epoch: 4
Train Loss : 2.304 | Train Acc : 9.660 | Training time: 0:00:16.242965
Test Loss : 2.303, Test Acc : 10.000
…
loss is 2.303 and not changing
I tried to find out why, and I found out my network is not working
Is anyone can tell me why is this happening?
please let me know