Hello dear community. I’m trying to write a simple classifier that is predicting the letter based on input but it doesn’t see to converge. The loss is still 2.5 after all. I tried changing batch size, number of layers and other parameters but it didn’t change. I’m new to NN and can’t figure out what exactly is wrong. Hope you have some tips for me!
process_data.py
import csv
from torch.utils.data import Dataset, DataLoader
from typing import List
import numpy as np
class MyData(Dataset):
def __init__(self, path: str = "data_ac_full.csv"):
labels, samples = MyData.read_label_samples_from_csv(path)
self.np_samples = np.array(samples, dtype=np.float32)
self._labels = np.array(labels, dtype=np.uint8)
def __len__(self):
return len(self._labels)
def __getitem__(self, idx):
return {"points": self.np_samples[idx], "labels": self._labels[idx]}
@staticmethod
def read_label_samples_from_csv( path: str):
mapping = {"A": 1, "B":2, "C": 3} # all of the letters except for 'J' and 'Z'
labels, samples = [], []
with open(path) as f:
_ = next(f) # skip header
for line in csv.reader(f):
label = line[0]
labels.append(mapping[label])
splitted = [MyData.split_samples(x) for x in line[2:]]
splitted = list(np.concatenate(splitted).flat)
samples.append(splitted)
return labels, samples
@staticmethod
def split_samples(sample):
y_pos = sample.find("y:")
z_pos = sample.find("z:")
x = float(sample[3:y_pos])
y = float(sample[y_pos + 3 : z_pos])
return [x, y]
def read_dataset(batch_size=32, ):
trainset = MyData()
trainloader = DataLoader(
trainset, batch_size=batch_size, shuffle=True
)
testset = MyData()
testloader = DataLoader(
testset, batch_size=batch_size, shuffle=False
)
return trainloader, testloader
if __name__ == "__main__":
data = read_dataset()
train.py
import torch.nn as nn
import torch
import torch.optim as optim
from torch.autograd import Variable
from process_data import read_dataset
input_dim = 42
hidden_layers = 100
output_dim = 25
class Net(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Linear(input_dim, hidden_layers)
self.act1 = nn.ReLU()
self.layer2 = nn.Linear(hidden_layers, hidden_layers)
self.act2 = nn.ReLU()
self.output = nn.Linear(hidden_layers, output_dim)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
m = self.layer1(x)
x = self.act1(m)
x = self.act2(self.layer2(x))
x = self.softmax(self.output(x))
return x
def train(net, criterion, optimizer, trainloader, epoch):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs = Variable(data["points"].float())
labels = Variable(data['labels'])
# remove previous epoch gradients
optimizer.zero_grad()
# forward propagation
outputs = net(inputs)
loss = criterion(outputs, labels)
# backward propagation
loss.backward()
# optimize
optimizer.step()
running_loss += loss.item()
print('[epoch: %d, batch: %5d] loss: %.6f' % (epoch, i, running_loss / (i + 1)))
def main():
trainloader, testloader = read_dataset(batch_size=300)
net = Net().float()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
for epoch in range(10): # loop over the dataset
train(net, criterion, optimizer, trainloader, epoch)
scheduler.step()
torch.save(net.state_dict(), "saved.pth")
if __name__ == '__main__':
main()
if batch size = 32
Labels = [1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 3, 1, 1, 3, 3, 1, 1, 3, 1]
Inputs size = torch.Size([32, 63])
Output of train.py:
[epoch: 0, batch: 0] loss: 3.213218
[epoch: 0, batch: 1] loss: 3.213202
[epoch: 0, batch: 2] loss: 3.213065
[epoch: 0, batch: 3] loss: 3.212979
[epoch: 0, batch: 4] loss: 3.212907
[epoch: 0, batch: 5] loss: 3.212822
[epoch: 0, batch: 6] loss: 3.212732
[epoch: 0, batch: 7] loss: 3.212626
[epoch: 0, batch: 8] loss: 3.212534
[epoch: 1, batch: 0] loss: 3.211746
[epoch: 1, batch: 1] loss: 3.211465
[epoch: 1, batch: 2] loss: 3.211172
[epoch: 1, batch: 3] loss: 3.211001
[epoch: 1, batch: 4] loss: 3.210788
[epoch: 1, batch: 5] loss: 3.210574
[epoch: 1, batch: 6] loss: 3.210420
[epoch: 1, batch: 7] loss: 3.210246
[epoch: 1, batch: 8] loss: 3.210095
[epoch: 2, batch: 0] loss: 3.208078
[epoch: 2, batch: 1] loss: 3.207820
[epoch: 2, batch: 2] loss: 3.207630
[epoch: 2, batch: 3] loss: 3.207447
[epoch: 2, batch: 4] loss: 3.207174
[epoch: 2, batch: 5] loss: 3.206867
[epoch: 2, batch: 6] loss: 3.206626
[epoch: 2, batch: 7] loss: 3.206323
[epoch: 2, batch: 8] loss: 3.206030
[epoch: 3, batch: 0] loss: 3.203678
[epoch: 3, batch: 1] loss: 3.202639
[epoch: 3, batch: 2] loss: 3.202355
[epoch: 3, batch: 3] loss: 3.201975
[epoch: 3, batch: 4] loss: 3.201828
[epoch: 3, batch: 5] loss: 3.201533
[epoch: 3, batch: 6] loss: 3.201332
[epoch: 3, batch: 7] loss: 3.200817
[epoch: 3, batch: 8] loss: 3.200394
[epoch: 4, batch: 0] loss: 3.196789
[epoch: 4, batch: 1] loss: 3.196557
[epoch: 4, batch: 2] loss: 3.195881
[epoch: 4, batch: 3] loss: 3.195361
[epoch: 4, batch: 4] loss: 3.194702
[epoch: 4, batch: 5] loss: 3.194068
[epoch: 4, batch: 6] loss: 3.193679
[epoch: 4, batch: 7] loss: 3.192990
[epoch: 4, batch: 8] loss: 3.192376
[epoch: 5, batch: 0] loss: 3.186233
[epoch: 5, batch: 1] loss: 3.186157
[epoch: 5, batch: 2] loss: 3.186067
[epoch: 5, batch: 3] loss: 3.184781
[epoch: 5, batch: 4] loss: 3.183736
[epoch: 5, batch: 5] loss: 3.182839
[epoch: 5, batch: 6] loss: 3.181987
[epoch: 5, batch: 7] loss: 3.181091
[epoch: 5, batch: 8] loss: 3.179891
[epoch: 6, batch: 0] loss: 3.169108
[epoch: 6, batch: 1] loss: 3.168651
[epoch: 6, batch: 2] loss: 3.167292
[epoch: 6, batch: 3] loss: 3.166000
[epoch: 6, batch: 4] loss: 3.164232
[epoch: 6, batch: 5] loss: 3.162400
[epoch: 6, batch: 6] loss: 3.160793
[epoch: 6, batch: 7] loss: 3.158822
[epoch: 6, batch: 8] loss: 3.157000
[epoch: 7, batch: 0] loss: 3.131654
[epoch: 7, batch: 1] loss: 3.130803
[epoch: 7, batch: 2] loss: 3.128822
[epoch: 7, batch: 3] loss: 3.124050
[epoch: 7, batch: 4] loss: 3.120708
[epoch: 7, batch: 5] loss: 3.115770
[epoch: 7, batch: 6] loss: 3.110153
[epoch: 7, batch: 7] loss: 3.104420
[epoch: 7, batch: 8] loss: 3.099207
[epoch: 8, batch: 0] loss: 3.012532
[epoch: 8, batch: 1] loss: 3.004640
[epoch: 8, batch: 2] loss: 2.989662
[epoch: 8, batch: 3] loss: 2.979682
[epoch: 8, batch: 4] loss: 2.961897
[epoch: 8, batch: 5] loss: 2.942658
[epoch: 8, batch: 6] loss: 2.922926
[epoch: 8, batch: 7] loss: 2.898688
[epoch: 8, batch: 8] loss: 2.875924
[epoch: 9, batch: 0] loss: 2.632436
[epoch: 9, batch: 1] loss: 2.636438
[epoch: 9, batch: 2] loss: 2.621789
[epoch: 9, batch: 3] loss: 2.601568
[epoch: 9, batch: 4] loss: 2.600631
[epoch: 9, batch: 5] loss: 2.586250
[epoch: 9, batch: 6] loss: 2.581876
[epoch: 9, batch: 7] loss: 2.572590
[epoch: 9, batch: 8] loss: 2.566086