Calculate loss of one hot vector

Hi everyone,

I got my ground_truth data which is one hot vector, like ground truth[56] = [0, 1, 0, 0, 0].
Now i want to train my classifier to give me output of five element like the ground truth vectors, plus I would like to checkout my accuracy. After using my codes I see some vectors like [[ 2.2090e-02, 1.6277e-02, 3.2342e-01, 3.7611e-02, 4.8665e-01],…]. respectively my accuracy is zero , however my loss is reducing. Could you please tell my what should I do?

My NN:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(7056, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        #x = x.view(-1, 16 * 5 * 5)
        x = x.view(x.size(0),-1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

My training loop:

model = Net()
learning_rate = 0.001
criterion = torch.nn.MSELoss()    # Softmax is internally computed.
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

train_cost = []
train_accu = []
batch_size = 50
training_epochs = 5
total_batch = len(train_dataset) // batch_size

def compute_accuracy(Y_target, hypothesis):
    Y_prediction = hypothesis
    accuracy = ((Y_prediction.data == Y_target.data).float().mean())    
    return accuracy.item()


for epoch in range(training_epochs):
    avg_cost = 0
    all_hyp = []

    for i, (batch_X, batch_Y) in enumerate(train_loader):
        
        # Select a minibatch
        X = Variable(batch_X.float())    # image is already size of (96x96), no reshape
        Y = Variable(batch_Y.float())    # label is not one-hot encoded
        
        # initialization of the gradients
        optimizer.zero_grad()
        
        # Forward propagation: compute the output
        hypothesis = model(X)

        # Computation of the cost J
        cost = criterion(hypothesis, Y) # <= compute the loss function
        
        # Backward propagation
        cost.backward() # <= compute the gradients
        
        # Update parameters (weights and biais)
        optimizer.step()
        
        # Print some performance to monitor the training
        train_accu.append(compute_accuracy(Y, hypothesis))
        train_cost.append(cost.item())
        all_hyp.append(hypothesis)
        #if i % 200 == 0:
        print("Epoch= {},\t batch = {},\t cost = {:2.4f},\t accuracy = {}".format(epoch+1, i, train_cost[-1], train_accu[-1]))
        
        avg_cost += cost.data / total_batch

    print("[Epoch: {:>4}], averaged cost = {:>.9}".format(epoch + 1, avg_cost.item()))

'''

Start of training:
Epoch= 1,	 batch = 0,	 cost = 1.0654,	 accuracy = 0.0
Epoch= 1,	 batch = 1,	 cost = 484.8475,	 accuracy = 0.0
Epoch= 1,	 batch = 2,	 cost = 76.3100,	 accuracy = 0.0
Epoch= 1,	 batch = 3,	 cost = 40.9975,	 accuracy = 0.0
Epoch= 1,	 batch = 4,	 cost = 4.0524,	 accuracy = 0.0
Epoch= 1,	 batch = 5,	 cost = 3.7894,	 accuracy = 0.0
Epoch= 1,	 batch = 6,	 cost = 2.8075,	 accuracy = 0.0
Epoch= 1,	 batch = 7,	 cost = 1.5608,	 accuracy = 0.0
Epoch= 1,	 batch = 8,	 cost = 1.0739,	 accuracy = 0.0
Epoch= 1,	 batch = 9,	 cost = 0.4769,	 accuracy = 0.0
Epoch= 1,	 batch = 10,	 cost = 0.1573,	 accuracy = 0.0
Epoch= 1,	 batch = 11,	 cost = 0.2139,	 accuracy = 0.0
Epoch= 1,	 batch = 12,	 cost = 0.3936,	 accuracy = 0.0
Epoch= 1,	 batch = 13,	 cost = 0.1600,	 accuracy = 0.0

all_hyp[0]:
tensor([[ 2.2090e-02,  1.6277e-02,  3.2342e-01,  3.7611e-02,  4.8665e-01],
        [ 2.5872e-02,  2.0304e-02,  3.1909e-01,  2.6319e-02,  4.7692e-01],
        [ 3.8362e-02,  3.4545e-02,  3.0983e-01,  2.6629e-02,  4.4924e-01],
        [ 2.2708e-02,  4.8960e-02,  3.1353e-01,  1.9018e-02,  5.0377e-01],
        [ 2.2399e-02,  4.9786e-02,  3.2938e-01,  2.7199e-02,  5.2012e-01],
        [ 2.7666e-02,  6.1788e-02,  3.1604e-01,  1.9234e-02,  4.9765e-01],
        [ 2.0873e-02,  3.2703e-02,  3.3750e-01,  3.4890e-02,  5.4533e-01],
....

This comment is wrong and might come from an older code snippet:

criterion = torch.nn.MSELoss()    # Softmax is internally computed.

For a multi-class classification, I would recommend to use nn.CrossEntropyLoss and pass the targets as class indices to the criterion.
You could create the class indices using target = torch.argmax(Y_target, 1).
Also, to calculate the accuracy, use the prediction with the highest logit:

prediction = torch.argmax(hypothesis, 1)
1 Like