Leaf Variable moved into graph interior

justusschock · May 4, 2018, 7:16am

I get an error if I try to replace some values of a Tensor via indexing.

I simply use a torch.Tensor (dtype=float64) and try to set some values in it. I even tried to use the scatter_ function but it did not work either.

A simplified version of my code is:

import torch

# initialize tensor
tensor = torch.zeros((1, 400, 400)).double()
tensor.requires_grad_(True)

# create index ranges
x_range = torch.arange(150, 250).double()
x_range.requires_grad_(True)
y_range = torch.arange(150, 250).double()
y_range.requires_grad_(True)

# get indices of flattened tensor
x_range = x_range.long().repeat(100, 1)
y_range = y_range.long().repeat(100, 1)
y_range = y_range.t()
tensor_size = tensor.size()
indices = y_range.sub(1).mul(tensor_size[2]).add(x_range).view((1, -1))

# create patch
patch = torch.ones((1, 100, 100)).double()


# flatten tensor
tensor_flattened = tensor.contiguous().view((1, -1))

# set patch to cells of tensor_flattend at indices and reshape tensor
tensor_flattened.scatter_(1, indices, patch.view(1, -1))
tensor = tensor_flattened.view(tensor_size)

# sum up for scalar output for calling backward()
tensor_sum = tensor.sum()

# calling backward()
tensor_sum.backward()

# alternative to avoid summing tensor:
tensor.backward(torch.ones_like(tensor))

The error traceback is :

File “/home/students/schock/deep_aam_pytorch/testing_tensor.py”, line 32, in
tensor.backward(torch.ones_like(tensor))
File “/home/students/schock/miniconda3/envs/deep_aam/lib/python3.6/site-packages/torch/tensor.py”, line 93, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File “/home/students/schock/miniconda3/envs/deep_aam/lib/python3.6/site-packages/torch/autograd/init.py”, line 89, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: leaf variable has been moved into the graph interior

Process finished with exit code 1

Any Idea where this comes from?

Thanks in advance!

lcf000000 · May 21, 2018, 2:01pm

I get the same trouble, do you find some solutions?

  File "/home/liuchenfeng/code/py-SMDNet/pretrain/train_smdnet.py", line 180, in <module>
    train_smdnet()
  File "/home/liuchenfeng/code/py-SMDNet/pretrain/train_smdnet.py", line 149, in train_smdnet
    loss.backward()
  File "/usr/local/lib/python2.7/dist-packages/torch/tensor.py", line 93, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/usr/local/lib/python2.7/dist-packages/torch/autograd/__init__.py", line 89, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: leaf variable has been moved into the graph interior

justusschock · May 21, 2018, 2:11pm

What’s your code like?

lcf000000 · May 22, 2018, 7:02am

I used the pytorch’s hook ‘register_forward_hook’ to get the feature map of the middle layer output. The loss calculation uses the feature map.Here is my loss function：

class StudentLoss(nn.Module):
    def __init__(self):
        super(StudentLoss, self).__init__()    
    def forward(self, tfmaps_list, sfmaps_list, open_conv):
        tfmaps_list_new = []
        ## get specific feature maps
        for idl in range(0, len(sfmaps_list)):
            sfmaps = sfmaps_list[idl]
            tfmaps = tfmaps_list[idl]
            tfmaps_new = torch.FloatTensor(sfmaps_list[idl].shape[0], sfmaps_list[idl].shape[1],
                                           sfmaps_list[idl].shape[2], sfmaps_list[idl].shape[3]).zero_()
            tfmaps_new.requires_grad_()
            for idx in range(0, sfmaps.shape[0]):
                for idy in range(0, sfmaps.shape[1]):
                    tfmaps_new[idx][idy] = tfmaps[idx][open_conv[idl % 3][idy]]
            tfmaps_list_new.append(tfmaps_new)

        fmaps_losses = torch.FloatTensor(1).zero_().cuda()
        for idl in range(0, len(sfmaps_list)):
            sfmaps = sfmaps_list[idl]
            tfmaps = tfmaps_list_new[idl]
            fmaps_loss = (torch.pow(F.pairwise_distance(sfmaps.view(sfmaps.shape[0], -1).cuda(), \
                                                        tfmaps.view(tfmaps.shape[0], -1).cuda()), 2))

            fmaps_losses += fmaps_loss.sum()

	return fmaps_losses

And the part of my main function is here:

result_tfeature = []
result_sfeature = []


def get_tfeature_hook(self, input, output):
    result_tfeature.append(output)

def get_sfeature_hook(self, input, output):
    result_sfeature.append(output)

def set_optimizer(model, lr_base, lr_mult=opts['lr_mult'], momentum=opts['momentum'], w_decay=opts['w_decay']):
    params = model.get_learnable_params()
    param_list = []
    for k, p in params.iteritems():
        lr = lr_base
        for l, m in lr_mult.iteritems():
            if k.startswith(l):
                lr = lr_base * m
        param_list.append({'params': [p], 'lr': lr})
    optimizer = optim.SGD(param_list, lr=lr, momentum=momentum, weight_decay=w_decay)
    return optimizer


def train_smdnet():

    ## Init dataset ##
    with open(data_path, 'rb') as fp:
        data = pickle.load(fp)

    ## Init model ##
    tmodel = MDNet(opts['model_path'], K)
    smodel = SMDNet(opts['init_model_path'], K)
    if opts['use_gpu']:
        tmodel = tmodel.cuda()
        smodel = smodel.cuda()
    smodel.set_learnable_params(['conv'])

    ## Set teacher and student network's hook to get feature maps ##
    tConv = torch.nn.ModuleList()
    for lname, list in tmodel.named_children():
        if lname in ['layers']:
            for name, module in list.named_children():
                if name in ['conv1','conv2','conv3']:
                    tConv.append(module)
    thandle_feat_conv1 = tConv[0].register_forward_hook(get_tfeature_hook)  # conv1
    thandle_feat_conv2 = tConv[1].register_forward_hook(get_tfeature_hook)  # conv1
    thandle_feat_conv3 = tConv[2].register_forward_hook(get_tfeature_hook)  # conv1

    sConv = torch.nn.ModuleList()
    for lname, list in smodel.named_children():
        if lname in ['layers']:
            for name, module in list.named_children():
                if name in ['conv1', 'conv2', 'conv3']:
                    sConv.append(module)
    shandle_feat_conv1 = sConv[0].register_forward_hook(get_sfeature_hook)  # conv1
    shandle_feat_conv2 = sConv[1].register_forward_hook(get_sfeature_hook)  # conv1
    shandle_feat_conv3 = sConv[2].register_forward_hook(get_sfeature_hook)  # conv1

    ## Init criterion and optimizer ##
    criterion = StudentLoss()
    evaluator = Precision()
    optimizer = set_optimizer(smodel, opts['lr'])

    best_prec = 0.
    for i in range(opts['n_cycles']):
        print "==== Start Cycle %d ====" % (i)
        k_list = np.random.permutation(K)
        prec = np.zeros(K)
        for j, k in enumerate(k_list):
            tic = time.time()
            pos_regions, neg_regions = dataset[k].next()

            pos_regions = Variable(pos_regions)
            neg_regions = Variable(neg_regions)

            if opts['use_gpu']:
                pos_regions = pos_regions.cuda()
                neg_regions = neg_regions.cuda()

            pos_score = smodel(pos_regions, k)
            neg_score = smodel(neg_regions, k)
            tmodel(pos_regions, k, out_layer='conv3')
            tmodel(neg_regions, k, out_layer='conv3')

            loss = criterion(result_tfeature, result_sfeature, smodel.open_conv)

            smodel.zero_grad()
            loss.backward()

            torch.nn.utils.clip_grad_norm(smodel.parameters(), opts['grad_clip'])
            optimizer.step()

            # remove hook
            thandle_feat_conv1.remove()
            thandle_feat_conv2.remove()
            thandle_feat_conv3.remove()
            shandle_feat_conv1.remove()
            shandle_feat_conv2.remove()
            shandle_feat_conv3.remove()

            prec[k] = evaluator(pos_score, neg_score)

naifrec · May 29, 2018, 2:26pm

any update / fixes for that guys? facing a very similar issue here

justusschock · May 29, 2018, 3:45pm

For me using built-in pytorch methods as gather, index_put_, masked_scatter_ or grid_sample instead of direct indexing.

Note: since some of the functions are equivalent to direct indexing, it could work with flattening the tensor via .view(-1) and index the flattened tensor.

ChengzheXu · June 11, 2018, 1:06pm

Oh, I have a very similar problem with you too, I came across that when I added a binarized weight into conv layer (XNOR network)

fm2019 · July 31, 2019, 11:27am

I just faced the same problem, with the following MWE

A = torch.randn([5,5], requires_grad=True)
B = torch.zeros([2,2], requires_grad=True)
B[0,0] = A[0,0]
C=B.norm()
C.backward()

raising the error

RuntimeError: leaf variable has been moved into the graph interior

The issue may be resolved by setting requires_grad=False when initializing B. In another example, the same solution has been proposed: Leaf variable has been moved into the graph interior - #2 by ptrblck

ajayunagar · March 12, 2020, 2:19am

Is it possible to trace this error back? Like in which operation is it happening? I have really complicated code, and I am not sure where I am making this error. I am relatively new to PyTorch so I don’t know which operations are differentiable and which ones are not!

Anyway here is my code. requires_grad = True for camera_params and points_3d

def rotate(points, rot_vecs):
    
    theta = torch.norm(rot_vecs, dim = 1, keepdim=True)
    v = rot_vecs/theta
    
    dot = torch.sum(points*v, dim = 1, keepdim = True)
    
    cos_theta = torch.cos(theta)
    sin_theta = torch.sin(theta)
    
    ans = cos_theta*points + sin_theta*torch.cross(v, points) + dot*(1-cos_theta)*v
    
    return ans

def project(points, camera_params):
    R = torch.index_select(camera_params, 1, torch.tensor([0,1,2]))
    T = torch.index_select(camera_params, 1, torch.tensor([3,4,5]))

    points_proj = rotate(points, R)
    points_proj = points_proj + T
    denom = torch.index_select(points_proj,1,torch.tensor([2])).view(-1,1)
    points_proj = -torch.index_select(points_proj,1,torch.tensor([0,1]))/denom
    f = torch.index_select(camera_params, 1, torch.tensor([6]))
    k1 = torch.index_select(camera_params, 1, torch.tensor([7]))
    k2 = torch.index_select(camera_params, 1, torch.tensor([8]))
    
    n = torch.sum(torch.mul(points_proj,points_proj), dim = 1)
    r = 1 + torch.mul(n,k1.view(k1.numel())) + torch.mul(k2.view(k2.numel()),torch.mul(n,n))
    points_proj = points_proj*torch.mul(r,f.view(f.numel())).unsqueeze(1)
    return points_proj

def fun(camera_params, points_3d, n_cameras, n_points, camera_indices, point_indices, points_2d):
    points_3d_2 = torch.index_select(points_3d, 0, point_indices)
    camera_params_2 = torch.index_select(camera_params, 0, camera_indices)
    points_proj = project(points_3d_2, camera_params_2)
    ans = points_proj - points_2d
    return ans.view(ans.numel())

for i in range(10):
    f = fun(camera_params, points_3d, n_cameras, n_points, camera_indices, point_indices, points_2d)
    
    loss = f.pow(2).sum()
    
    print(i, " --> ", loss.item())
    loss.backward()

    
    with autograd.detect_anomaly():
        camera_params -= lr*camera_params.grad
        points_3d -= lr*points_3d.grad
        
        camera_params.grad.zero_()
        points_3d.grad.zero_()

Can someone help me where am I making this error?

RuntimeError                              Traceback (most recent call last)

<ipython-input-76-54598957eb0e> in <module>()
      5 
      6     print(i, " --> ", loss.item())
----> 7     loss.backward()
      8 
      9 

1 frames

/usr/local/lib/python3.6/dist-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
    193                 products. Defaults to ``False``.
    194         """
--> 195         torch.autograd.backward(self, gradient, retain_graph, create_graph)
    196 
    197     def register_hook(self, hook):

/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
     97     Variable._execution_engine.run_backward(
     98         tensors, grad_tensors, retain_graph, create_graph,
---> 99         allow_unreachable=True)  # allow_unreachable flag
    100 
    101 

RuntimeError: leaf variable has been moved into the graph interior