This won’t work, as the forward pass in net2
isn’t tracked.
You could set the requires_grad
attribute of all parameters of net2
to False
, which would work:
net1 = nn.Linear(10, 10)
net2 = nn.Linear(10, 10)
for param in net2.parameters():
param.requires_grad_(False)
input = torch.randn(1, 10)
intermediate = net1(input)
output = net2(intermediate)
loss = ((output - torch.ones_like(output))**2).sum()
loss.backward()
print([p.grad for p in net1.parameters()])
> [tensor([[ 1.5799, 0.2672, 0.5110, 0.3669, 1.2625, -1.0545, 0.9783, 0.0497,
0.3349, -0.8837],
[ 0.5785, 0.0978, 0.1871, 0.1343, 0.4623, -0.3861, 0.3582, 0.0182,
0.1226, -0.3236],
[ 0.7554, 0.1277, 0.2443, 0.1754, 0.6037, -0.5042, 0.4677, 0.0237,
0.1601, -0.4225],
[ 2.8158, 0.4762, 0.9107, 0.6540, 2.2502, -1.8794, 1.7435, 0.0885,
0.5969, -1.5751],
[ 1.6854, 0.2850, 0.5451, 0.3914, 1.3468, -1.1249, 1.0436, 0.0530,
0.3572, -0.9427],
[-6.6639, -1.1269, -2.1553, -1.5477, -5.3253, 4.4478, -4.1262, -0.2094,
-1.4125, 3.7275],
[-2.5643, -0.4336, -0.8294, -0.5955, -2.0492, 1.7115, -1.5878, -0.0806,
-0.5435, 1.4344],
[ 3.9104, 0.6613, 1.2647, 0.9082, 3.1249, -2.6100, 2.4213, 0.1229,
0.8289, -2.1873],
[-0.9989, -0.1689, -0.3231, -0.2320, -0.7982, 0.6667, -0.6185, -0.0314,
-0.2117, 0.5587],
[ 2.2345, 0.3779, 0.7227, 0.5190, 1.7857, -1.4914, 1.3836, 0.0702,
0.4737, -1.2499]]), tensor([-0.9480, -0.3471, -0.4533, -1.6896, -1.0113, 3.9986, 1.5386, -2.3464,
0.5994, -1.3408])]
print([p.grad for p in net2.parameters()])
> [None, None]