Hello!

I have the following code for classification on 2x128x128 pictures, which cannot even overfit at all, however loss tends to zero when applying huge number of epochs (~500).

In case of classification between 15 classes: accuracy on training set goes up until only 60%, accuracy on test set is around 58%.

While in case of 5 classes: these values are 84 and 75 respectively.

(Still cannot overfit even with many epochs and using no regularization.)

I am pretty sure I’m making a rookie mistake, but could not figure it out yet.

Architecture:

```
nn.Sequential(
nn.Conv2d(2, 17, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(17, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(17, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(32, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(48, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(96, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(192, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(256, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Conv2d(192, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.AdaptiveAvgPool2d(output_size=2),
nn.Flatten(start_dim=1, end_dim=-1),
nn.Dropout(p=0.2, inplace=False),
nn.Linear(in_features=512, out_features=256, bias=True),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.Dropout(p=0.2, inplace=False),
nn.Linear(in_features=256, out_features=numClasses, bias=True),
nn.LogSoftmax(dim=1))
```

Training:

```
def TrainModel(model, numEpochs, lossFunction, optimizer, dataSource, saveToFolder, scheduler=None):
epochLossSum = 0.0
counter = 0.0
for epoch in range(numEpochs):
print("Epoch: ", epoch+1)
for i, batch in enumerate(dataSource, 0):
inputs = batch['input']
labels = batch['label']
outputs = model(inputs)
loss = lossFunction(outputs, labels)
epochLossSum = epochLossSum + loss
counter = counter + 1
loss.backward()
optimizer.step()
optimizer.zero_grad()
if scheduler is not None:
scheduler.step()
print("Average loss in epoch ", epoch + 1)
print('%.10f' % (epochLossSum/counter))
epochLossSum = 0.0
counter = 0.0
print('Finished Training')
torch.save(model.state_dict(), saveToFolder)
print('Model saved to ', saveToFolder)
```

Accuracy check:

```
def ModelAccuracy(model, trainingData, validationData):
goodTrain = 0.0
badTrain = 0.0
goodVal = 0.0
badVal = 0.0
for i, batch in enumerate(validationData, 0):
inputs = batch['input']
labels = batch['label']
inputs = inputs.unsqueeze(0)
labels = labels.unsqueeze(0)
prediction = np.argmax(model(inputs).detach().numpy(), axis=1)
if(labels.item() == prediction[0]):
goodVal = goodVal + 1
else:
badVal = badVal + 1
print("Accuracy on validation set: ", 100*goodVal/(goodVal+badVal), "%")
for i, batch in enumerate(trainingData, 0):
inputs = batch['input']
labels = batch['label']
inputs = inputs.unsqueeze(0)
labels = labels.unsqueeze(0)
prediction = np.argmax(model(inputs).detach().numpy(), axis=1)
if(labels.item() == prediction[0]):
goodTrain = goodTrain + 1
else:
badTrain = badTrain + 1
print("Accuracy on training set: ", 100*goodTrain/(goodTrain+badTrain), "%")
```

And putting these together:

```
if __name__ == "__main__":
classifier = DHMClassifier(n_classes=numClasses)
classifier = classifier.to(torch.device("cuda"))
classifier = classifier.train()
trainingData = DataAssembler(dataSource, train=True, onCuda=True)
trainingLoader = DataLoader(trainingData, batch_size=10, num_workers=0)
optimizer = optim.Adam(classifier.parameters(), lr=0.0001)
lossFunction = nn.NLLLoss(weight=trainingData.GetBalancerCUDA())
TrainModel(classifier, 500, lossFunction, \
optimizer, trainingLoader, savingDirectory, scheduler=None)
classifier = classifier.eval()
validationData = DataAssembler(dataSource, train=False, onCuda=False)
EvaluatedModel = DHMClassifier(n_classes=numClasses)
EvaluatedModel.load_state_dict(torch.load(savingDirectory))
EvaluatedModel = EvaluatedModel.to(torch.device("cpu"))
trainingData.DictToCPU()
ModelAccuracy(EvaluatedModel, trainingData, validationData)
```

Any help or hints are really appreciated. (I know that some code parts are not very efficient, but that shouldn’t cause problem I believe.)