I am trying to train a CNN to classify pictures but even after 100 epochs over the training set (around 60k images) the test accuracy stays at around 30%, which is definitely not enough, while the training accuracy oscillates between 95% and 97%, and the CCE loss oscillates between 4 and 6. Is there anything I can do to improve the test accuracy?
Here is the model
class BirdClassifier(nn.Module):
def __init__(self):
super().__init__()
# Input size: [3, 224, 224]
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 10, kernel_size = 3, padding = 1) # Output size: [batch_size, 10, 224, 224]
self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2) # Output size: [batch_size, 10, 112, 112]
self.relu1 = nn.ReLU(True)
self.conv2 = nn.Conv2d(in_channels = 10, out_channels = 20, kernel_size = 3, padding = 1) # Output size: [batch_size, 20, 112, 112]
self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2) # Output size: [batch_size, 20, 56, 56]
self.relu2 = nn.ReLU(True)
self.conv3 = nn.Conv2d(in_channels = 20, out_channels = 30, kernel_size = 3, padding = 1) # Output size: [batch_size, 30, 112, 112]
self.pool3 = nn.MaxPool2d(kernel_size = 2, stride = 2) # Output size: [batch_size, 30, 28, 28]
self.relu3 = nn.ReLU(True)
self.fc1 = nn.Linear(in_features = 30 * 28 * 28, out_features = 1000)
self.fc2 = nn.Linear(in_features = 1000, out_features = 525)
self.relu4 = nn.ReLU(True)
def forward(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.relu2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.relu3(x)
x = self.fc1(x.view(x.size(0), -1))
x = self.fc2(x.view(x.size(0), -1))
x = self.relu4(x)
return x
And here are the train function and an evaluate function, which calculates the accuracy over a dataset:
def evaluate(model, dataset, device):
wrong, correct, total = 0, 0, 0
for data in tqdm(dataset):
img, expected_class = data
img = img.to(device)
output = torch.argmax(model(img.view(1,3,224,224)))
total += 1
if expected_class == output:
correct += 1
else: wrong += 1
return abs(correct - wrong) / total
def train(model, criterion, optimizer, train_dataloader, train_dataset, test_dataset, device, epochs = 1):
for epoch in range(0, epochs):
loss = 0
for data in tqdm(train_dataloader):
img, expected_output = data
img = img.to(device)
expected_output = torch.tensor(expected_output, device = device)
output = model(img.view(img.size(0), 3, 224, 224))
l = criterion(output, expected_output)
loss += l
optimizer.zero_grad()
l.backward()
optimizer.step()
print('Epoch [{}/{}], loss: {:.4f}'.format(epoch + 1, epochs, loss))
if epoch % 5 == 0:
print('Training accuracy: {:.4f}'.format(evaluate(model, train_dataset, device)))
print('Test accuracy: {:.4f}'.format(evaluate(model, test_dataset, device)))
Finally, here the model is loaded, trained and evaluated:
device = 'cpu'
use_saved_model = True
MODEL_PATH = '/kaggle/working/model.pth'
if cuda.is_available(): device = 'cuda'
model = BirdClassifier()
if use_saved_model:
model.load_state_dict(torch.load(MODEL_PATH))
print('Model loaded')
model = model.to(device)
summary(model, (3,224,224))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)
train(model, criterion, optimizer, train_dataloader, train_dataset, test_dataset, device, epochs = 10)
torch.save(model.to('cpu').state_dict(), MODEL_PATH)
# Model 1 trained epochs: 250