Hello, I’m new to Deep Learning and I’m trying to use VGG16 to build a classifier for CIFAR-10 dataset (I increased the size of the images to 224x224 to match with the original architecture of VGG16). The problem is the loss value in the training loop is very big and doesn’t have a tendency to reduce.
I tried to compare my code with other implementations, and the most notable difference IMO, is the data augmentation process. I copied it to my code, but the loss didn’t really change compared to my original code. Here is my original code:
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_classes = 10
n_epochs = 100
batch_size = 100
learning_rate = 0.001
train_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),])
test_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor()])
train_dataset = datasets.CIFAR10(root='./data',
train=True,
transform=train_transform,
download=True)
test_dataset = datasets.CIFAR10(root='./data',
train=False,
transform=test_transform,
download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
example = iter(train_loader)
example_data, example_target = next(example)
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(example_data[i].permute(1,2,0))
plt.show()
class customVGG16(nn.Module):
def __init__(self, num_classes=10):
super().__init__()
self.features = nn.Sequential(
# Block 1
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 2
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 3
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 4
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
# Block 5
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.classifier = nn.Sequential(
# nn.Linear(512, 4096),
nn.Linear(25088, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
model = customVGG16(num_classes=n_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.01)
for epoch in range(n_epochs):
model.train()
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{n_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
scheduler.step()
Here is the output after the first few batches, the remaining are practically the same:
Epoch [1/100], Step [1/500], Loss: 2.3027
Epoch [1/100], Step [2/500], Loss: 2.9318
Epoch [1/100], Step [3/500], Loss: 2.3184
Epoch [1/100], Step [4/500], Loss: 2.2989
Epoch [1/100], Step [5/500], Loss: 2.2904
Epoch [1/100], Step [6/500], Loss: 2.4422
Epoch [1/100], Step [7/500], Loss: 2.3080
Epoch [1/100], Step [8/500], Loss: 2.3024
Epoch [1/100], Step [9/500], Loss: 2.2964
Epoch [1/100], Step [10/500], Loss: 2.3805
Why is the training performance so poor? And how to fix it?
Any help would be appreciated.