I have defined a small neural net and tried to train it. However, the loss did not have requires_grad set - that was curious. I set a breakpoint in the forward method of my neural net, and none of the intermediate variables created (embs, means, sim, out) had requires_grad set.
Here is my code:
class Cbow(nn.Module):
def __init__(self, vocab_size, hctx_len, emb_size):
super().__init__()
self.proj = nn.Linear(in_features=vocab_size, out_features=emb_size, bias=False)
self.hidden = nn.Linear(in_features=emb_size, out_features=vocab_size, bias=False)
def forward(self, x):
# x: (num_batches, 2*hctx_len, vocab_size)
embs = self.proj(x) # (num_batches, 2*hctx_len, emb_size)
means = embs.mean(dim=1) # (num_batches, emb_size)
sim = self.hidden(means) # (num_batches, vocab_size)
out = torch.softmax(sim, dim=1) # (num_batches, vocab_size)
# breakpoint()
return out
def fit_model_layers(model, train_loader, epochs, lr=0.01):
model.train()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
optimizer.zero_grad()
for e in range(epochs):
running_loss, num_batches = 0, 0
for x, y in train_loader:
out = model(x)
# calculate loss
loss = loss_fn(out, y) # loss.requires_grad = False !!!!!!
running_loss += loss.item()
num_batches += 1
# backprop + optimization step
loss.backward()
optimizer.step()
optimizer.zero_grad()
print(f'Epoch {e+1} loss: {running_loss / num_batches}')
I don’t quite understand what I do wrong. In the optimization example from PyTorch (Optimizing Model Parameters — PyTorch Tutorials 2.2.1+cu121 documentation) intermediate results in forward() all have requires_grad set.
What could be wrong?