is backwarding using these 3 methods, even on less trivial examples, equivalent in result?
what about compute usage?
import torch
def main():
x = torch.tensor(69., requires_grad=True)
y = torch.tensor(420., requires_grad=True)
(x*y + (x+y)/x).backward()
print(x.grad, y.grad)
x = torch.tensor(69., requires_grad=True)
y = torch.tensor(420., requires_grad=True)
torch.stack((x*y, (x+y)/x)).backward(torch.tensor([1., 1.]))
print(x.grad, y.grad)
x = torch.tensor(69., requires_grad=True)
y = torch.tensor(420., requires_grad=True)
_ = torch.stack((x*y, (x+y)/x))
for _ in _:
_.backward(retain_graph=1)
print(x.grad, y.grad)
if __name__ == '__main__':
main()