@Oli @tymokvo
Thanks for the replies, I removed the softmax layer, not sure if that is the right thing to do because I know that softmax is used for multi-class classification.
Basically I am trying to build a super simple multi-class classification in pytorch! I have done this in Keras easily but I’m not sure what I’m doing wrong here.
Here is the new model, and I am sure it is wrong because the prediction is always the same.
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.layer1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.layer2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.relu(out)
out = self.layer2(out)
return out
train_x = Variable(torch.from_numpy(np.asanyarray(x_train[x_train.columns[:-1]]))).float()
train_y = Variable(torch.LongTensor(x_train_label)).long()
test_x = Variable(torch.from_numpy(np.asanyarray(x_test[x_test.columns[:-1]]))).float()
test_y = Variable(torch.LongTensor(x_test_label)).long()
The input and label are like this:
train_x, x_train_label
(tensor([[ 4.3734, 14.7227, 14.0051, ..., 10.8181, 10.1554, 10.5231],
[ 1.3698, 16.1158, 16.2395, ..., 10.0263, 10.1859, 11.0192],
[ 5.4781, 13.8524, 13.4969, ..., 10.2149, 10.1836, 11.0694],
...,
[ 0.9918, 15.1710, 12.5128, ..., 9.5816, 9.3345, 10.6840],
[ 0.0000, 11.7931, 11.7571, ..., 9.9917, 10.2152, 11.6365],
[ 0.8513, 13.1537, 11.8680, ..., 9.6107, 10.6265, 10.2632]]),
tensor([28, 22, 19, ..., 26, 29, 5]))
tr_latent_X = data_utils.TensorDataset(train_x, train_y)
te_latent_X = data_utils.TensorDataset(test_x, test_y)
train_loader_X = torch.utils.data.DataLoader(dataset=tr_latent_X,
batch_size=bs,
shuffle=False)
test_loader_X = torch.utils.data.DataLoader(dataset=tr_latent_X,
batch_size=bs,
shuffle=False)
vae = NeuralNet(5000 ,1000, 32)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vae.parameters(), lr = 0.02)
def train(epoch):
vae.train()
train_loss = 0
for batch_idx, (data,label) in enumerate(train_loader_X):
optimizer.zero_grad()
out = vae(data)
loss = loss_function(out, label)
loss.backward()
train_loss += loss.item()
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader_X.dataset),
100. * batch_idx / len(train_loader_X), loss.item() / len(data)))
print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader_X.dataset)))
for epoch in range(1, 10):
train(epoch)
Train Epoch: 1 [0/6483 (0%)] Loss: 0.093940
Train Epoch: 1 [6400/6483 (98%)] Loss: 0.051790
====> Epoch: 1 Average loss: 3.8580
Train Epoch: 2 [0/6483 (0%)] Loss: 0.052003
Train Epoch: 2 [6400/6483 (98%)] Loss: 0.051288
====> Epoch: 2 Average loss: 0.0511
Train Epoch: 3 [0/6483 (0%)] Loss: 0.051706
Train Epoch: 3 [6400/6483 (98%)] Loss: 0.051227
====> Epoch: 3 Average loss: 0.0508
Train Epoch: 4 [0/6483 (0%)] Loss: 0.051529
Train Epoch: 4 [6400/6483 (98%)] Loss: 0.051222
====> Epoch: 4 Average loss: 0.0507
Train Epoch: 5 [0/6483 (0%)] Loss: 0.051466
Train Epoch: 5 [6400/6483 (98%)] Loss: 0.051220
====> Epoch: 5 Average loss: 0.0507
Train Epoch: 6 [0/6483 (0%)] Loss: 0.051450
Train Epoch: 6 [6400/6483 (98%)] Loss: 0.051218
====> Epoch: 6 Average loss: 0.0507
Train Epoch: 7 [0/6483 (0%)] Loss: 0.051448
Train Epoch: 7 [6400/6483 (98%)] Loss: 0.051215
====> Epoch: 7 Average loss: 0.0507
Train Epoch: 8 [0/6483 (0%)] Loss: 0.051449
Train Epoch: 8 [6400/6483 (98%)] Loss: 0.051212
====> Epoch: 8 Average loss: 0.0507
Train Epoch: 9 [0/6483 (0%)] Loss: 0.051451
Train Epoch: 9 [6400/6483 (98%)] Loss: 0.051210
====> Epoch: 9 Average loss: 0.0507
out = []
k = []
for data,label in test_loader_X:
out.append(vae(data))
k.append(label)
and here is how the prediction on the test looks like:
[tensor([[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
...,
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857]],
grad_fn=<AddmmBackward>),
tensor([[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
...,
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857],
[ 0.3506, 0.4214, 0.6405, ..., -0.5726, 0.3580, -1.2857]],
grad_fn=<AddmmBackward>),