Puzzled by "RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation"

sergiosonline · April 23, 2019, 3:51pm

Hi there!

I’m working on a NN convolutional model and I am encountering the following error: “RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation,” but I’m not quite sure what I’m doing wrong.

This is the model output with the exception, where criterion_2 = nn.CrossEntropyLoss():

Exception reporting mode: Plain tensor([[0.1523, 0.1850, 0.1665, 0.2883, 0.2079]], grad_fn=<TransposeBackward0>) tensor([1, 1])

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/container.py:92: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument. input = module(input)

Traceback (most recent call last):

File “<ipython-input-57-6995ff2eb00e>”, line 3, in <module> the_model = train(train_d, arg, training_data_predefined = b)

File “<ipython-input-55-1b8bf27ff47f>”, line 75, in train lossing_me.backward()

File “/usr/local/lib/python3.6/dist-packages/torch/tensor.py”, line 102, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph)

File “/usr/local/lib/python3.6/dist-packages/torch/autograd/init.py”, line 90, in backward allow_unreachable=True) # allow_unreachable flag

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation

This is my training algo:

def train(train_d, args, training_data_predefined = , mod_use = None):
#setting up the packages
torch.set_num_threads(5)
npr.seed(args.seed)
dir_save_to = “results/” + args.dir_name
if not os.path.exists(dir_save_to):
os.makedirs(dir_save_to)

#type of metric to use for loss:
l_t = 1
if arg.loss_type == "L2":
    l_t = 0


#which model to implament
if mod_use is None:
    if args.model_use == "Mod1":
          mod_use = TS_based_simple(num_outs = args.k_size) #args.num_filters, num_colours, arg.num_in_chans)
    elif args.model_use == "Mod1wFactors":
          mod_use = TS_based__wfactors(num_outs = args.k_size) #, args.num_filters, num_colours, arg.num_in_chans)
    
#setting up the model's optimizaer
optimizer = torch.optim.Adam(mod_use.parameters(), lr=args.lrn_rate)

#loss function
if args.loss_type == "L2":
    ind = np.triu_indices(2)
    criterion = nn.MSELoss() 
elif args.loss_type == "Eigen":
    criterion = nn.L
criterion_2 = nn.CrossEntropyLoss()
#where we are going to gather data
hist_tr_loss = []
hist_tt_loss = []

#creat trainig data sets if not premade
if training_data_predefined == []:
    training_d, validation_d = train_set(train_dd, args)
else:
    TS_d = training_data_predefined[0]
    validation_d = training_data_predefined[1]
    
#let's start training
for epoch in range(arg.num_epochs):
    epoch_loss = []
    
    
    #training data
    for ddd in TS_d:
        input_d = ddd[0]
        
        output_d = ddd[1]
        output_d = [output_d[i][:,0,:][0] for i in range(len(output_d))] 
        out_mat = pd.DataFrame({"S%d" %i: d for i,d in  enumerate(output_d)}).cov()
        dims_I_want_out = tuple(list(output_d[0].size()) + [1])
        #print(output_d)
        
        #setup optimizer
        optimizer.zero_grad()
        
        #set up optimization and calculating loss
        output_choices = mod_use(input_d)
        output_choice = [output_choices.max(1)[1][0].numpy()]
        losses = [criterion(torch.from_numpy(np.asanyarray(out_mat)[ind]),
                               torch.from_numpy(np.asanyarray(which_mat(input_d, [i]))[ind]))
                 for i in range(args.k_size)]
        target = torch.empty(2, dtype=torch.long)
        target[0:2] = losses.index(min(losses))
        print(output_choices[0].unsqueeze_(-1).transpose(0,1))
        print(target)
        lossing_me = criterion_2(output_choices, target)
        
        #optimizing
        lossing_me.backward()
        optimizer.step()
        
        #adding performance
        epoch_loss.append(lossing.data.item())
    
    #printing performance on epoch
    mean_loss = np.mean(epoch_loss)
    hist_tr_loss.append(mean_loss)
    print('Epoch [%d/%d], T Loss: %.4f' % (epoch+1, args.num_epochs, mean_loss))
    
    
    """
    #let's validate
    temp_validation = []
    for ddd in validation_d:
        inputs = ddd[0]
        meta_d = ddd[1]
        
        input_d = inputs[0]
        outpt_d = inputs[1]
        
        #setup optimizer
        optimizer.zero_grad()
        
        #set up optimization
        output_choice = mod_use(input_d)
        CM_use = which_mat(input_d, output_choice)
        lossing_me = cov_matrix_loss(output_d, CM_use, l_t)
        
        
        #adding performance
        temp_validation.append(lossing.data.item())"""
    
    hist_tt_loss = []
    #printing performance on epoch
    mean_loss = np.mean(temp_validation)
    hist_tt_loss.append(mean_loss)
    print('Epoch [%d/%d], V Loss: %.4f' % (epoch+1, args.num_epochs, mean_loss))


"""
plt.figure()
plt.plot(hist_tr_loss, "ro-", label="Train")
plt.plot(hist_tt_loss, "go-", label="Validation")
plt.legend()
plt.title("Loss")
plt.xlabel("Epochs")
plt.savefig(save_dir+"/training_curve.png")"""

if args.checkpoint:
    print('Saving model...')
    torch.save(mod_use.state_dict(), args.save_model_as)

return mod_use

sergiosonline · April 23, 2019, 6:53pm

I declared helper function and not getting the error anymore:

def compute_loss(criterion, outputs, targets):
“”"
Helper function to compute the loss. Since this is a pixelwise
prediction task we need to reshape the output and ground truth
tensors into a 2D tensor before passing it in to the loss criteron.
Args:
  criterion: pytorch loss criterion
  outputs (pytorch tensor): predicted labels from the model
  labels (pytorch tensor): ground truth labels
Returns:
  pytorch tensor for loss
"""

loss_out = outputs
loss_targs = targets
return criterion(loss_out, loss_targs)

However, the model seems to be stuck

Exception reporting mode: Verbose
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/container.py:92: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
input = module(input)
tensor([[0.2053, 0.1828, 0.1825, 0.2460, 0.1834]],
grad_fn=)
tensor([0, 0])

Oli · April 23, 2019, 8:11pm

Hard to say which code was responsible for the error but assigning values to a tensor is in place and can produce these errors.

tensor_name[0:2, 3] = 1 is forbidden to do on any tensor that is used in the model before the backpropagation has happened. Hard to debug though, maybe someone knows of a tool?