Test loss increase as soon as the training starts

ahmad_muqry · May 2, 2020, 2:05pm

I’m new to deep learning (image classification). I finetune a resnet50 model by replacing the top layer with my own sequential layer to classify 10 plant diseases. I only take the gradient of the sequential layer and set the other to false. The test loss seemed to increase as soon as it starts. Below are the pieces of information that may help:

Model: Resnet50 (pre-trained weights from ImageNet)
Batch Size: 10
Optimizer: Adam
Learning rate: 0.001
Loss: NLLLoss
Pytorch 1.5+cu101
Python 3.6.1
VS Code
100 training images/class
40 test images/class

Train-test loss of 30 epochs

# Freeze parameters so we don't backprop through them
for param in model.parameters():
    param.requires_grad = False

out_features = 10
hidden_layers = 1000

from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([
                          ('fc1', nn.Linear(model.fc.in_features, hidden_layers)),
                          ('relu', nn.ReLU()),
                          ('fc2', nn.Linear(hidden_layers, out_features)),
                          ('output', nn.LogSoftmax(dim=1))
                          ]))

model.fc = classifier

#writer = SummaryWriter('C:/Users/User/Desktop/Health-ID-Beta-v1/weights/resnet18/Log') 

#Loss function
criterion = nn.NLLLoss()

#Gradient descent function applied only on the new classifier
#optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9, nesterov=True )
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

epochs = 400
train_losses, test_losses = [], []

valid_loss_min = np.Inf

for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        
        #Load the images and labels to gpu
        images, labels = images.to(device), labels.to(device)
        
        start = time.time()
        
        # Clear the gradients, do this because gradients are accumulated
        optimizer.zero_grad()

        # Forward pass, then backward pass, then update weights
        output = model(images)
        
        #Calculate the loss with NLLLoss function
        loss = criterion(output, labels)
        
        #Calculate the gradient 
        loss.backward()
        
        #Update the weights for the new classifier
        optimizer.step() 
        
        running_loss += loss.item()
        
    else:
        ## TODO: Implement the validation pass and print out the validation accuracy
        test_loss = 0
        accuracy = 0
        
        with torch.no_grad():
            model.eval()
            
            for images, labels in testloader:
                
                #Load the images and labels to GPU memory.
                images, labels = images.to(device), labels.to(device)
                
                #Forward Pass
                output = model.forward(images)
                
                #Calculate the probability from log-softmax using exponential function.
                ps = torch.exp(output)
                
                #Get the loss from the test set using nn.LLLoss()
                test_loss += criterion(output, labels)
                
                #Get the highest k value from the output
                top_p, top_class = ps.topk(1, dim=1)
                
                #Compare the predicted classes with the labels.
                equals = top_class == labels.view(*top_class.shape)
                
                #Finding the mean of equals to obtain the accuracy.
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                
            model.train()
            train_losses.append(running_loss/len(trainloader))
            test_losses.append(test_loss/len(testloader))
            
            print(f"Device = {device}; Time per batch: {(time.time() - start)/3:.3f} seconds")
            
            print("Epoch: {}/{}.. ".format(e+1, epochs),
                  "Training Loss: {:.3f}.. ".format(train_losses[-1]),
                  "Test Loss: {:.3f}.. ".format(test_losses[-1]),
                  "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
            
        if test_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            test_loss))
            torch.save(model.state_dict(), 'weights/resnet-50/Health-ID-Beta-v1-720z720-Test.pt')
            valid_loss_min = test_loss

Is this overfit? How can I improve it?