RuntimeError one of the variables needed for gradient computation has been modified by an inplace operation

xgrxgr25 · July 9, 2018, 3:36pm

hello, i am trying to train my net but i am getting this error: “one of the variables needed for gradient computation has been modified by an inplace operation”.
here is the code:

class PolyNet(nn.Module): # nn.Module is parent class
def init(self, rank=4):
super(PolyNet, self).init() #calls init of parent class
self.layer1 = nn.Sequential(
nn.Conv2d(3, 1, kernel_size=7, stride=1, padding=2),
nn.BatchNorm2d(1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc = nn.Linear(63631, rank)

 def forward(self, x):
     """
     Feed forward through network
     Args:
         x - input to the network
         
     Returns "out", which is the network's output
     """
     
     out = self.layer1(x)
     out = out.reshape(out.size(0), -1)
     out = self.fc(out)
    
     return out

def func(x,a,b,c,d):
    return (x**3)*a+(x**2)*b+c*x+d
def my_loss(outputs, labels):
    
    """
    Args:
        outputs - output of network ([batch size, rank+1]) 
        labels  - desired coefficients  ([batch size, rank+1])
    """
    
    loss = torch.zeros(1, dtype=torch.float, requires_grad=True)
    loss = loss.to(device)
    arr2 = torch.zeros(4, dtype=torch.float, requires_grad=True)
    arr = torch.zeros(10, dtype=torch.float, requires_grad=True)
    for i in range(0,4):
        x=0
        for j in range(0,10):
            x = x + 0.1*j
            y1=func(x,outputs[i,0],outputs[i,1],outputs[i,2],outputs[i,3]) 
            y2=func(x,labels[i,0],labels[i,1],labels[i,2],labels[i,3])
            n=torch.abs(y1-y2)
            arr[j]=n
        a=torch.max(arr)
        arr2[i] = a
        
        
    loss=(torch.sum(arr2))/4
    print(loss)

        
    # Observe: If you need to iterate and add certain values to loss defined above
    # you cannot write: loss +=... because this will raise the error: 
    # "Leaf variable was used in an inplace operation"
    # Instead, to avoid this error write: loss = loss + ...  
    
                                      
    return loss

model = PolyNet().to(device)
print ("Number of model trainable parameters:", get_train_params_num(model))

#----------------------------------------------
#  Choose your optimizer:
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 
#----------------------------------------------

def train_model(model,
               optimizer,
               train_loader,
               validation_loader,
               train_losses,
               validation_losses,
               epochs=2):
   
   """
   Trains a neural network. 
   Args;
       model               - model to be trained
       optimizer           - optimizer used for training
       train_loader        - loader from which data for training comes 
       validation_loader   - loader from which data for validation comes (maybe at the end, you use test_loader)
       train_losses        - adding train loss value to this list for future analysis
       validation_losses   - adding validation loss value to this list for future analysis
       epochs              - number of runs over the entire data set 
   """
   
   
   for epoch in range(epochs):
       for i, data in enumerate(loader, 0):
           inputs = (data['image']).to(device)
           labels = (data['labels']).to(device)
           # Forward pass
           outputs = model(inputs.float())
           loss = my_loss(outputs, labels)
       
            # Backward and optimize
           optimizer.zero_grad()
           loss.backward()
           optimizer.step()
   
   return 
   
if not 'train_losses' in vars():
   train_losses = []
if not 'validation_losses' in vars():
   validation_losses = []


train_model(model, 
           optimizer,
           train_loader, 
           validation_loader, 
           train_losses, 
           validation_losses,
           epochs=2)

and the error:

RuntimeError Traceback (most recent call last)
in ()
17 train_losses,
18 validation_losses,
—> 19 epochs=2)

in train_model(model, optimizer, train_loader, validation_loader, train_losses, validation_losses, epochs)
30 # Backward and optimize
31 optimizer.zero_grad()
—> 32 loss.backward()
33 optimizer.step()
34

~\Miniconda3\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph)
91 products. Defaults to False.
92 “”"
—> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph)
94
95 def register_hook(self, hook):

~\Miniconda3\lib\site-packages\torch\autograd_init_.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
87 Variable._execution_engine.run_backward(
88 tensors, grad_tensors, retain_graph, create_graph,
—> 89 allow_unreachable=True) # allow_unreachable flag
90
91

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation

ptrblck · July 9, 2018, 8:08pm

I believe the following lines are causing the error:

    ...
    arr[j]=n
...
arr2[i] = a
...

You could append n and a to a list and creating a tensor from it.
Have a look at this modified code:

def my_loss(outputs, labels):
    
    """
    Args:
        outputs - output of network ([batch size, rank+1]) 
        labels  - desired coefficients  ([batch size, rank+1])
    """
    
    loss = torch.zeros(1, dtype=torch.float, requires_grad=True)
    loss = loss.to(device)
    arr2 = []

    for i in range(0,4):
        x=0
        arr = []
        for j in range(0,10):
            x = x + 0.1*j
            y1=outputs[i,0]
            y2=labels[i,0]
            n=torch.abs(y1-y2)
            arr.append(n)
        arr = torch.stack(arr)
       
        a=torch.max(arr)
        arr2.append(a)
        
    arr2 = torch.stack(arr2)
    loss=(torch.sum(arr2))/4
    print(loss)
                                  
    return loss

Note that I had to move arr into the inner loop.
Would that work for you? I’m not familiar with your loss function, so could you check, if the values make sense?

Thuy_D · March 14, 2019, 6:39pm

Thank you, I got the same problem and your tip worked for me. However, I don’t know what the root cause is.
In my case,

def forward(self, inputs, hidden):
...
     emb_to_hidden = self.first_hidden(embedded_inputs[timestep])
     previous_to_hidden = self.last_time_hiddens[0](hidden[0])
     hidden[0] = emb_to_hidden + previous_to_hidden //this line causes error
...
     return outputs, hidden

Why is the operator to re-assign hidden[0] considered as an inplace operation?