Simple networks does not reach zero loss

lfolle · November 6, 2019, 6:35pm

Why is this simple network not converging despite same inputs and labels each epoch?

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt


class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(30, 20)
        self.lrelu1 = nn.LeakyReLU()
        self.lin2 = nn.Linear(20, 9)
        self.lrelu2 = nn.LeakyReLU()
        self.act = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.lin1(x)
        x = self.lrelu1(x)
        x = self.lin2(x)
        x = self.lrelu2(x)
        x = x.reshape(3, 3)
        return self.act(x)
            

model = SimpleModel()            
x = torch.randn(1, 30)
y = torch.tensor([[1., 0., 0.],
                  [0., 1., 0.],
                  [0., 0., 1.]])

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
epochs = 40000

loss_history = []
for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(x)
    loss = criterion(output, y)
    loss_history.append(loss)
    loss.backward()
    optimizer.step()
    print('Loss: {:.3f}'.format(loss.item()))
    
print("target: {}".format(y))
print("output: {}".format(output))

plt.plot(np.linspace(1, epochs, epochs), loss_history)
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

SimonW · November 6, 2019, 6:50pm

Because BCEWithLogitsLoss expects logits, not probabilities. Remove your softmax.

lfolle · November 6, 2019, 6:57pm

Thank you.
This did the trick for me:

criterion = nn.BCELoss()