Why is this simple network not converging despite same inputs and labels each epoch?
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
class SimpleModel(nn.Module):
def __init__(self):
super().__init__()
self.lin1 = nn.Linear(30, 20)
self.lrelu1 = nn.LeakyReLU()
self.lin2 = nn.Linear(20, 9)
self.lrelu2 = nn.LeakyReLU()
self.act = nn.Softmax(dim=1)
def forward(self, x):
x = self.lin1(x)
x = self.lrelu1(x)
x = self.lin2(x)
x = self.lrelu2(x)
x = x.reshape(3, 3)
return self.act(x)
model = SimpleModel()
x = torch.randn(1, 30)
y = torch.tensor([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)
epochs = 40000
loss_history = []
for epoch in range(epochs):
optimizer.zero_grad()
output = model(x)
loss = criterion(output, y)
loss_history.append(loss)
loss.backward()
optimizer.step()
print('Loss: {:.3f}'.format(loss.item()))
print("target: {}".format(y))
print("output: {}".format(output))
plt.plot(np.linspace(1, epochs, epochs), loss_history)
plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()