Hi !
I wrote some code that can be reduced to this :
import numpy as np
import torch
def exp_sqrt(x):
return torch.exp(torch.sqrt(x))
def loss_func1(x):
return torch.norm(x)
def loss_func2(x,y):
return torch.norm(x-y)
def loss_func3(p,q):
return torch.sum(p*q)
def loss_func4(p,q):
return torch.sum(p+q)
def loss_func5(p,q):
return torch.sum(p*torch.log(p/q)-p+q)
N = 16
M = 2 # No bug if M=1
torch.autograd.set_detect_anomaly(True)
np.random.seed(0)
A = torch.from_numpy(np.random.rand(M,N)).requires_grad_(True)
B = torch.from_numpy(np.zeros((M,N)))
C = torch.from_numpy(np.random.rand(M,N))
loss = torch.tensor([0.0], dtype=torch.float64)
for i in range(M):
B[i] = exp_sqrt(A[i])
# Choose one of those:
loss = loss + loss_func1(B[i]) # BUG
# loss = loss + loss_func3(B[i],C[i]) # BUG
# loss = loss + loss_func5(B[i],C[i]) # BUG
# loss = loss + loss_func1(B[i]+1) # NO BUG
# loss = loss + loss_func1(exp_sqrt(A[i])) # NO BUG
# loss = loss + loss_func2(B[i],C[i]) # NO BUG
# loss = loss + loss_func4(B[i],C[i]) # NO BUG
# Solution : avoid indexing B with new tensors that require grad.
# b = exp_sqrt(A[i])
# B[i] = b.clone()
# Choose one of those:
# loss = loss + loss_func1(b) # NO BUG
# loss = loss + loss_func3(b,C[i]) # NO BUG
# loss = loss + loss_func5(b,C[i]) # NO BUG
print("Loss =",loss)
print("Computing backward")
loss.backward()
print(A.grad)
Lines with “BUG” mean that they trigger the “in-place modification error”.
I have found a solution, which is described in comments, but I’m interested in understanding the reasons for the appearance of the error or not.
I understand that it’s this instruction B[i] = exp_sqrt(A[i])
that is problematic (as soon as we do it more than once), but why does the error appear or not, depending on the loss_funcX I use ?
Thank you.