I would like to pass in a tensor that contains 4 values like this
[.45, .78, .23, .56]
And get an output from the network with only the last neuron light up like this
[0,0,0,1]
I would then like to pass in a second tensor [.67, .89, .34, .12]
and get an output from that looks like this [1,0,0,0]
with only the first neuron light up.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
train = [
[torch.FloatTensor([[.45, .78, .23, .56]]), torch.FloatTensor([[0,0,0,1]]) ],
[torch.FloatTensor([[.67, .89, .34, .12]]), torch.FloatTensor([[1,0,0,0]]) ]
]
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(4, 2)
self.fc2 = nn.Linear(2, 4)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return F.softmax(x, dim=0)
net = Net()
print(net)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9, nesterov=True)
criterion = nn.CrossEntropyLoss()
for epoch in range(10):
for data in train:
X, y = data
net.zero_grad()
output = net(X)
loss = criterion(output, y)
loss.backward()
optimizer.step()
print(loss)
# test
X, y = torch.tensor([.45, .78, .23, .56]), torch.tensor([0,0,0,1])
print(net(X), y)
X, y = torch.tensor([.67, .89, .34, .12]), torch.tensor([1,0,0,0])
print(net(X), y)
unfortunately the network is output rubbish that doesn’t correspond to any particular neuron light up
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor(1.3863, grad_fn=<DivBackward1>)
tensor([0.3175, 0.1841, 0.2063, 0.2922], grad_fn=<SoftmaxBackward0>) tensor([0, 0, 0, 1])
tensor([0.3129, 0.1935, 0.2102, 0.2834], grad_fn=<SoftmaxBackward0>) tensor([1, 0, 0, 0])
I’m getting outputs like this [0.3175, 0.1841, 0.2063, 0.2922]
which is not what the network should be outputing.
I think this might be due to the way in which the CrossEntropyLoss function is comparing the training data (X) and the label y. I think this line is causing the problem loss = criterion(output, y)
.
What loss function should I be using to get the desired results?