I read a couple of threads here but I could not resolve the issue in my code. I am a newbie in deep learning and Pytorch. I was wondering why the loss does not change and, at the end of the code, I found the weights all are zeros.
I would appreciate if you can help me with this.
def generate_minibatch (X, y): # X and y are numpy matrices
X, y = shuffle(X, y)
for i in range (0, X.shape [0], args.batch_size): #batch_size is equal to 128
X_mini = X [i:i + args.batch_size]
y_mini = y [i:i + args.batch_size]
y_mini = y_mini.reshape(-1, 1)
X_mini = torch.FloatTensor(X_mini)
y_mini = torch.FloatTensor(y_mini)
y_mini = y_mini.view(-1, 1)
yield X_mini, y_mini
class reviewClassifier(nn.Module):
def __init__(self):
super(reviewClassifier, self).__init__()
self.fc1 = nn.Linear(in_features=args.fixed_dimension, out_features=64) # fixed dimension is equal to 128
self.fc2 = nn.Linear(in_features=64, out_features=32)
self.fc3 = nn.Linear(in_features=32, out_features=1)
def forward(self, x):
x = x.view(-1, args.fixed_dimension)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
y_pred = torch.sigmoid(x)
return y_pred
classifier = reviewClassifier()
loss_func = nn.BCELoss()
optimizer = optim.Adam(classifier.parameters(), lr=0.01)
for epoch in range(100):
total_loss = 0
for X_mini, y_mini in generate_minibatch(X_train, y_train):
classifier.zero_grad()
y_pred = classifier.forward(x=X_mini.float())
loss = loss_func(y_pred, y_mini)
total_loss = torch.add(total_loss, loss.data)
loss.backward()
optimizer.step()
if epoch % 10 == 0:
print(total_loss)
for idx , param in enumerate (list(classifier.parameters())):
print(' >> ', idx , param.grad)
print('Finished!')
Output of loss after every 10 epochs and grads (I guess these are trainable weights) are shown here:
tensor(51.2170)
tensor(43.6682)
tensor(43.6682)
tensor(43.6682)
tensor(43.6682)
tensor(43.6682)
tensor(43.6682)
tensor(43.6682)
tensor(43.6682)
tensor(43.6682)
>> 0 tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]])
>> 1 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
>> 2 tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]])
>> 3 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0.])
>> 4 tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0.]])
>> 5 tensor([0.])
Finished!