RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [200, 300, 3]] is at version 20; expected version 19 instead

Hello, I’m using a Pytorch to optimize the layer images from the perspective view images. When I use the loss.backpropagation, it fails to compute the gradient. So I tried to use the clone() function. But the problem still remains. Here are the codes.

The function translate_img implements the role of cv2.affine, and the function perspective returns the perspective view for the given angle.

 layer_ortho = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device) 
 Mask = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device)

 def translate_img(src_img,shift_distance,shape_of_out_img):
     h,w = src_img.shape[:2]
     x_distance,y_distance = shift_distance[0],shift_distance[1]
     ts_mat = np.array([[1,0,x_distance],[0,1,y_distance]])
     out_img = torch.zeros(shape_of_out_img).to(device)
    for i in range(h):
        for j in range(w):
            origin_x, origin_y = j, i
            origin_xy = np.array([origin_x, origin_y,1])
            new_xy =,origin_xy)
            new_x,new_y = int(new_xy[0]),int(new_xy[1])
            if 0<new_x<w and 0<new_y<h:
                out_img[new_y,new_x,:] = src_img[i,j,:]
    return out_img

# Off-axis perspective view
def perspective(layer_image,layer_ortho,Mask,z_back,z_front,dx,dy,theta_x,theta_y,device):
    ny, nx, n_depth = layer_image.shape[0], layer_image.shape[1], layer_image.shape[3]
    z = torch.linspace(z_front,z_back,n_depth)
    x0, y0 = z * np.tan(theta_x*math.pi/180), z * np.tan(theta_y*math.pi/180)   
    # Problem  occurs here(initialization)
    layer_ortho[:,:,:,n_depth-1] = layer_image[:,:,:,n_depth-1].clone()
    for n_d in reversed(range(n_depth-1)):         
        shift_distance = (x0[n_d]-x0[n_depth-1]).numpy()/dx, (y0[n_d]-y0[n_depth-1]).numpy()/dy
        ortho_tmp = translate_img(layer_image[:,:,:,n_d].clone(),shift_distance,(ny,nx,3))
        Mask[:,:,:,n_d] = torch.from_numpy(np.where(ortho_tmp.detach().cpu().numpy() > 0, 0, 1))  
        layer_ortho[:,:,:,n_d] = layer_ortho[:,:,:,n_d+1]*Mask[:,:,:,n_d]+ortho_tmp
    return layer_ortho[:,:,:,0] 
theta = 0, 0
parallel_view = perspective(layer_image,layer_ortho,Mask,z_range[0],z_range[1],dx,dy,theta[0],theta[1],device)


mse_loss = nn.MSELoss().to(device)  # + SSIM
var = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).cuda().requires_grad_(True) #layer_image.requires_grad_(True)

LR = 0.3
optimizer = optim.Adam([var], lr=LR)
theta_x = torch.linspace(-maxdiff,maxdiff,mx) 
theta_y = torch.linspace(-maxdiff,maxdiff,my)

layer_ortho = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device) 
Mask = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device)

for iter in range(100):
    time_start = time.time()
    l = 0     
    for i in range(my):
        for j in range(mx):            
            recon_target = perspective(var,layer_ortho,Mask,z_range[0],z_range[1],dx,dy,theta_x[i],theta_y[j],device)
            l = l + 1/(my*mx)*mse_loss(recon_target, LF[i,j,:,:,:].to(device))     

    time_end = time.time()
if (i) % 10 == 0:
    print(l, time_end - time_start)

The code implements the optimization using Adam optimizer.
Any help will be appreciated!