VGG16 performs poorly in training process

Hello, I’m new to Deep Learning and I’m trying to use VGG16 to build a classifier for CIFAR-10 dataset (I increased the size of the images to 224x224 to match with the original architecture of VGG16). The problem is the loss value in the training loop is very big and doesn’t have a tendency to reduce.

I tried to compare my code with other implementations, and the most notable difference IMO, is the data augmentation process. I copied it to my code, but the loss didn’t really change compared to my original code. Here is my original code:

import torch
import torchvision.datasets as datasets 
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

n_classes = 10
n_epochs = 100
batch_size = 100
learning_rate = 0.001

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()])

train_dataset = datasets.CIFAR10(root='./data',
                                 train=True,
                                 transform=train_transform,
                                 download=True)
test_dataset = datasets.CIFAR10(root='./data',
                                train=False,
                                transform=test_transform,
                                download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)

example = iter(train_loader)
example_data, example_target = next(example)

for i in range(6):
    plt.subplot(2,3,i+1)
    plt.imshow(example_data[i].permute(1,2,0))
plt.show()

class customVGG16(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 4
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Block 5
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            # nn.Linear(512, 4096),
            nn.Linear(25088, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = customVGG16(num_classes=n_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.01)

for epoch in range(n_epochs):
    model.train()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # if (i+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{n_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

    scheduler.step()

Here is the output after the first few batches, the remaining are practically the same:

Epoch [1/100], Step [1/500], Loss: 2.3027
Epoch [1/100], Step [2/500], Loss: 2.9318
Epoch [1/100], Step [3/500], Loss: 2.3184
Epoch [1/100], Step [4/500], Loss: 2.2989
Epoch [1/100], Step [5/500], Loss: 2.2904
Epoch [1/100], Step [6/500], Loss: 2.4422
Epoch [1/100], Step [7/500], Loss: 2.3080
Epoch [1/100], Step [8/500], Loss: 2.3024
Epoch [1/100], Step [9/500], Loss: 2.2964
Epoch [1/100], Step [10/500], Loss: 2.3805

Why is the training performance so poor? And how to fix it?
Any help would be appreciated.

The problem seems to be because the learning rate is still so big. I solved it by changing the learning rate to 1e-4 then the loss gradually decreased.