import torch
import torch.nn as nn
import torch.optim as optim
class Linear(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(4, 3)
def forward(self, x):
output = self.linear(x)
return output
def p1():
linear_layer = Linear()
optimizer = optim.SGD(linear_layer.parameters(), lr=0.001)
x0 = torch.randn((3, 4))
x1 = torch.randn((3, 4))
output_1 = linear_layer(x0)
output_2 = linear_layer(x1)
loss1 = output_1.mean()
loss2 = output_2.mean()
optimizer.zero_grad()
loss1.backward()
loss2.backward() # why this works?
optimizer.step()
for name, params in linear_layer.named_parameters():
print('name: ', name)
print('grad: ', params.grad)
def p2():
linear_layer = Linear()
optimizer = optim.SGD(linear_layer.parameters(), lr=0.001)
x = torch.randn((3, 4))
output_1 = linear_layer(x)
loss1 = output_1.mean()
loss2 = output_1.sum()
optimizer.zero_grad()
loss1.backward(retain_graph=True) # why this need set retain_graph=True
loss2.backward()
optimizer.step()
for name, params in linear_layer.named_parameters():
print('name: ', name)
print('grad: ', params.grad)
Hello, I know the computational graph will be freed after calling backward()
. Can anyone explain why p1 not need set retain_graph=True? Is it because two computational graphs are constructed?