Pytorch Classifier model doesn't converge

Hello dear community. I’m trying to write a simple classifier that is predicting the letter based on input but it doesn’t see to converge. The loss is still 2.5 after all. I tried changing batch size, number of layers and other parameters but it didn’t change. I’m new to NN and can’t figure out what exactly is wrong. Hope you have some tips for me!

process_data.py

import csv
from torch.utils.data import Dataset, DataLoader
from typing import List
import numpy as np

class MyData(Dataset):
    def __init__(self, path: str = "data_ac_full.csv"):
        labels, samples = MyData.read_label_samples_from_csv(path)
        self.np_samples = np.array(samples, dtype=np.float32)
        self._labels = np.array(labels, dtype=np.uint8)

    def __len__(self):
        return len(self._labels)

    def __getitem__(self, idx):
        return {"points": self.np_samples[idx], "labels": self._labels[idx]}
    
    @staticmethod
    def read_label_samples_from_csv( path: str):
        mapping = {"A": 1, "B":2, "C": 3} # all of the letters except for 'J' and 'Z'

        labels, samples = [], []
        with open(path) as f:
            _ = next(f) # skip header
            for line in csv.reader(f):
                label = line[0]
                labels.append(mapping[label])
                splitted = [MyData.split_samples(x) for x in line[2:]]
                splitted = list(np.concatenate(splitted).flat)
                samples.append(splitted)
        return labels, samples
    
    @staticmethod
    def split_samples(sample):
        y_pos = sample.find("y:")
        z_pos = sample.find("z:")
        x = float(sample[3:y_pos])
        y = float(sample[y_pos + 3 : z_pos])

        return [x, y]


def read_dataset(batch_size=32, ):
    trainset = MyData()
    trainloader = DataLoader(
        trainset, batch_size=batch_size, shuffle=True
    )

    testset = MyData()
    testloader = DataLoader(
        testset, batch_size=batch_size, shuffle=False
    )
    return trainloader, testloader


if __name__ == "__main__":
    data = read_dataset()

train.py

import torch.nn as nn
import torch
import torch.optim as optim
from torch.autograd import Variable

from process_data import read_dataset

input_dim = 42 
hidden_layers = 100 
output_dim = 25 

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(input_dim, hidden_layers)
        self.act1 = nn.ReLU() 
        self.layer2 = nn.Linear(hidden_layers, hidden_layers) 
        self.act2 = nn.ReLU()
     
        self.output = nn.Linear(hidden_layers, output_dim) 
        self.softmax = nn.Softmax(dim=1)
 
    def forward(self, x):
        m = self.layer1(x) 
        x = self.act1(m)
        x = self.act2(self.layer2(x))

        x = self.softmax(self.output(x))
        return x


def train(net, criterion, optimizer, trainloader, epoch):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs = Variable(data["points"].float())
        labels = Variable(data['labels'])
        # remove previous epoch gradients
        optimizer.zero_grad()
        # forward propagation
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        # backward propagation
        loss.backward()
        # optimize
        optimizer.step()

        running_loss += loss.item()
        print('[epoch: %d, batch: %5d] loss: %.6f' % (epoch, i, running_loss / (i + 1)))



def main():
    trainloader, testloader = read_dataset(batch_size=300)

    net = Net().float()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    for epoch in range(10):  # loop over the dataset
        train(net, criterion, optimizer, trainloader, epoch)
        scheduler.step()

    torch.save(net.state_dict(), "saved.pth")


if __name__ == '__main__':
    main()

if batch size = 32
Labels = [1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 3, 1, 1, 3, 1]
Inputs size = torch.Size([32, 63])

Output of train.py:
[epoch: 0, batch: 0] loss: 3.213218
[epoch: 0, batch: 1] loss: 3.213202
[epoch: 0, batch: 2] loss: 3.213065
[epoch: 0, batch: 3] loss: 3.212979
[epoch: 0, batch: 4] loss: 3.212907
[epoch: 0, batch: 5] loss: 3.212822
[epoch: 0, batch: 6] loss: 3.212732
[epoch: 0, batch: 7] loss: 3.212626
[epoch: 0, batch: 8] loss: 3.212534
[epoch: 1, batch: 0] loss: 3.211746
[epoch: 1, batch: 1] loss: 3.211465
[epoch: 1, batch: 2] loss: 3.211172
[epoch: 1, batch: 3] loss: 3.211001
[epoch: 1, batch: 4] loss: 3.210788
[epoch: 1, batch: 5] loss: 3.210574
[epoch: 1, batch: 6] loss: 3.210420
[epoch: 1, batch: 7] loss: 3.210246
[epoch: 1, batch: 8] loss: 3.210095
[epoch: 2, batch: 0] loss: 3.208078
[epoch: 2, batch: 1] loss: 3.207820
[epoch: 2, batch: 2] loss: 3.207630
[epoch: 2, batch: 3] loss: 3.207447
[epoch: 2, batch: 4] loss: 3.207174
[epoch: 2, batch: 5] loss: 3.206867
[epoch: 2, batch: 6] loss: 3.206626
[epoch: 2, batch: 7] loss: 3.206323
[epoch: 2, batch: 8] loss: 3.206030
[epoch: 3, batch: 0] loss: 3.203678
[epoch: 3, batch: 1] loss: 3.202639
[epoch: 3, batch: 2] loss: 3.202355
[epoch: 3, batch: 3] loss: 3.201975
[epoch: 3, batch: 4] loss: 3.201828
[epoch: 3, batch: 5] loss: 3.201533
[epoch: 3, batch: 6] loss: 3.201332
[epoch: 3, batch: 7] loss: 3.200817
[epoch: 3, batch: 8] loss: 3.200394
[epoch: 4, batch: 0] loss: 3.196789
[epoch: 4, batch: 1] loss: 3.196557
[epoch: 4, batch: 2] loss: 3.195881
[epoch: 4, batch: 3] loss: 3.195361
[epoch: 4, batch: 4] loss: 3.194702
[epoch: 4, batch: 5] loss: 3.194068
[epoch: 4, batch: 6] loss: 3.193679
[epoch: 4, batch: 7] loss: 3.192990
[epoch: 4, batch: 8] loss: 3.192376
[epoch: 5, batch: 0] loss: 3.186233
[epoch: 5, batch: 1] loss: 3.186157
[epoch: 5, batch: 2] loss: 3.186067
[epoch: 5, batch: 3] loss: 3.184781
[epoch: 5, batch: 4] loss: 3.183736
[epoch: 5, batch: 5] loss: 3.182839
[epoch: 5, batch: 6] loss: 3.181987
[epoch: 5, batch: 7] loss: 3.181091
[epoch: 5, batch: 8] loss: 3.179891
[epoch: 6, batch: 0] loss: 3.169108
[epoch: 6, batch: 1] loss: 3.168651
[epoch: 6, batch: 2] loss: 3.167292
[epoch: 6, batch: 3] loss: 3.166000
[epoch: 6, batch: 4] loss: 3.164232
[epoch: 6, batch: 5] loss: 3.162400
[epoch: 6, batch: 6] loss: 3.160793
[epoch: 6, batch: 7] loss: 3.158822
[epoch: 6, batch: 8] loss: 3.157000
[epoch: 7, batch: 0] loss: 3.131654
[epoch: 7, batch: 1] loss: 3.130803
[epoch: 7, batch: 2] loss: 3.128822
[epoch: 7, batch: 3] loss: 3.124050
[epoch: 7, batch: 4] loss: 3.120708
[epoch: 7, batch: 5] loss: 3.115770
[epoch: 7, batch: 6] loss: 3.110153
[epoch: 7, batch: 7] loss: 3.104420
[epoch: 7, batch: 8] loss: 3.099207
[epoch: 8, batch: 0] loss: 3.012532
[epoch: 8, batch: 1] loss: 3.004640
[epoch: 8, batch: 2] loss: 2.989662
[epoch: 8, batch: 3] loss: 2.979682
[epoch: 8, batch: 4] loss: 2.961897
[epoch: 8, batch: 5] loss: 2.942658
[epoch: 8, batch: 6] loss: 2.922926
[epoch: 8, batch: 7] loss: 2.898688
[epoch: 8, batch: 8] loss: 2.875924
[epoch: 9, batch: 0] loss: 2.632436
[epoch: 9, batch: 1] loss: 2.636438
[epoch: 9, batch: 2] loss: 2.621789
[epoch: 9, batch: 3] loss: 2.601568
[epoch: 9, batch: 4] loss: 2.600631
[epoch: 9, batch: 5] loss: 2.586250
[epoch: 9, batch: 6] loss: 2.581876
[epoch: 9, batch: 7] loss: 2.572590
[epoch: 9, batch: 8] loss: 2.566086

1 Like

nn.CrossEntropyLoss expects raw logits so remove the nn.Softmax from your model.

1 Like

Thank you, that helped!

1 Like