Model is not improving enough

retr00h · May 20, 2023, 12:33pm

I am trying to train a CNN to classify pictures but even after 100 epochs over the training set (around 60k images) the test accuracy stays at around 30%, which is definitely not enough, while the training accuracy oscillates between 95% and 97%, and the CCE loss oscillates between 4 and 6. Is there anything I can do to improve the test accuracy?
Here is the model

class BirdClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        # Input size: [3, 224, 224]
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 10, kernel_size = 3, padding = 1) # Output size: [batch_size, 10, 224, 224]
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) # Output size: [batch_size, 10, 112, 112]
        self.relu1 = nn.ReLU(True)
        self.conv2 = nn.Conv2d(in_channels = 10, out_channels = 20, kernel_size = 3, padding = 1) # Output size: [batch_size, 20, 112, 112]
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) # Output size: [batch_size, 20, 56, 56]
        self.relu2 = nn.ReLU(True)
        self.conv3 = nn.Conv2d(in_channels = 20, out_channels = 30, kernel_size = 3, padding = 1) # Output size: [batch_size, 30, 112, 112]
        self.pool3 = nn.MaxPool2d(kernel_size = 2, stride = 2) # Output size: [batch_size, 30, 28, 28]
        self.relu3 = nn.ReLU(True)
        self.fc1 = nn.Linear(in_features = 30 * 28 * 28, out_features = 1000)
        self.fc2 = nn.Linear(in_features = 1000, out_features = 525)
        self.relu4 = nn.ReLU(True)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.relu2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.relu3(x)
        x = self.fc1(x.view(x.size(0), -1))
        x = self.fc2(x.view(x.size(0), -1))
        x = self.relu4(x)
        return x

And here are the train function and an evaluate function, which calculates the accuracy over a dataset:

def evaluate(model, dataset, device):
    wrong, correct, total = 0, 0, 0
    for data in tqdm(dataset):
        img, expected_class = data
        img = img.to(device)
        output = torch.argmax(model(img.view(1,3,224,224)))
        total += 1
        if expected_class == output:
            correct += 1
        else: wrong += 1
    return abs(correct - wrong) / total

def train(model, criterion, optimizer, train_dataloader, train_dataset, test_dataset, device, epochs = 1):
    for epoch in range(0, epochs):
        loss = 0
        for data in tqdm(train_dataloader):
            img, expected_output = data
            img = img.to(device)
            expected_output = torch.tensor(expected_output, device = device)
            output = model(img.view(img.size(0), 3, 224, 224))
            l = criterion(output, expected_output)
            loss += l
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        print('Epoch [{}/{}], loss: {:.4f}'.format(epoch + 1, epochs, loss))
        if epoch % 5 == 0:
            print('Training accuracy: {:.4f}'.format(evaluate(model, train_dataset, device)))
            print('Test accuracy: {:.4f}'.format(evaluate(model, test_dataset, device)))

Finally, here the model is loaded, trained and evaluated:

device = 'cpu'
use_saved_model = True
MODEL_PATH = '/kaggle/working/model.pth'
if cuda.is_available(): device = 'cuda'

model = BirdClassifier()
if use_saved_model:
    model.load_state_dict(torch.load(MODEL_PATH))
    print('Model loaded')
    
model = model.to(device)
summary(model, (3,224,224))

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)
train(model, criterion, optimizer, train_dataloader, train_dataset, test_dataset, device, epochs = 10)
torch.save(model.to('cpu').state_dict(), MODEL_PATH)
# Model 1 trained epochs: 250

ptrblck · May 20, 2023, 7:49pm

Using a ReLU on the output is often not a good idea as it clips the logits, so I would probably remove it.
Also, without an activatiom function between self.fc1 and self.fc2 these layers could collapse to a single one so either add an activation or remove one of them.