Why are gradients not defined in the first module (the last one when backward)?
Thank you.
import torch, torch.nn as nn
model = nn.Sequential(nn.Linear(3, 2), nn.Linear(2, 1) )
def hook(module, grad_input, grad_output):
for k,p in module.named_parameters():
print(f"{k:20s} {tuple(p.grad.shape) if p.grad is not None else p.grad } {tuple(p.data.shape)}")
model[0].register_full_backward_hook(hook)
model[1].register_full_backward_hook(hook)
x = torch.randn(1, 3)
out = model(x)
out.mean().backward()
Output:
weight (1, 2) (1, 2)
bias (1,) (1,)
weight None (2, 3)
bias None (2,)