Error when running autograd

avijit_dasgupta · July 2, 2017, 9:04pm

I am trying to run the following code where trainloader is simply loaded with the MNIST dataset:

from torch.autograd import Variable
import pdb

W_1 = Variable(torch.randn(input_dim, num_of_hidden_nodes).type(torch.FloatTensor), requires_grad=True)
W_2 = Variable(torch.randn(num_of_hidden_nodes, output_dim).type(torch.FloatTensor), requires_grad=True)

for epoch in range(0, num_epochs):
    y_batch_onehot = Variable(torch.FloatTensor(batch_size, output_dim))
    correct = 0
    for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
        
        x_batch = Variable(x_batch, requires_grad=False)
        y_batch = Variable(y_batch, requires_grad=False)       
        
        # Forward Pass
        output = sigmoid(sigmoid(x_batch.view(-1, 784).mm(W_1)).mm(W_2))
#         hidden_state_output = sigmoid(torch.mm(x_batch, W_1))
#         output = sigmoid(torch.mm(hidden_state_output, W_2))
        
        
        
        
        # Convert the labels to one hot encoded format
        y_batch_onehot.data.zero_()
        y_batch_onehot.data.scatter_(1, y_batch[:, None].data, 1)

        
        # Loss (Mean-Squared error)     
        loss += (output - y_batch_onehot).pow(2).mul(0.5).sum()
        loss.backward()
#         pdb.set_trace()
        
         # Manually zero the gradients before running the backward pass

        
                
        W_1.grad.data.zero_()
        W_2.grad.data.zero_()
        # Calculate no of correct classifications
        _, predicted_class = output.max(1)
        correct += predicted_class.eq(y_batch.data).sum()              
        
       
        
        
        
        W_1.data -= learning_rate * W_1.grad.data
        W_2.data -= learning_rate * W_2.grad.data

    print("Epoch: {0} | loss: {1} | accuracy: {2}".format(epoch, loss.data[0], correct/len(train_loader.dataset)))

But I am getting the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-18-181cdf870c66> in <module>()
     28         # Loss (Mean-Squared error)
     29         loss += (output - y_batch_onehot).pow(2).mul(0.5).sum()
---> 30         loss.backward()
     31 #         pdb.set_trace()
     32 

/users/avijit.d/.local/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_variables)
    144                     'or with gradient w.r.t. the variable')
    145             gradient = self.data.new().resize_as_(self.data).fill_(1)
--> 146         self._execution_engine.run_backward((self,), (gradient,), retain_variables)
    147 
    148     def register_hook(self, hook):

/users/avijit.d/.local/lib/python3.6/site-packages/torch/autograd/_functions/basic_ops.py in backward(self, grad_output)
    185             return grad_output.mul(self.fw_result).mul_(math.log(self.constant))
    186         else:
--> 187             a = self.saved_tensors[0]
    188             return grad_output.mul(self.constant).mul_(a.pow(self.constant - 1))
    189 

RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.

Can anyone please help me?

smth · July 3, 2017, 2:38am

you cant do loss += there. Do loss =

If you want to keep a running loss for reporting purposes, do it like this:

loss = ...
total_loss += loss.data[0]

avijit_dasgupta · July 3, 2017, 4:34am

I have edited my code as per your suggestion. But getting the following error -

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-7-60a90c59aa74> in <module>()
     30         loss = (output - y_batch_onehot).pow(2).mul(0.5).sum()
     31         total_loss += loss.data[0]
---> 32         loss.backward()
     33 #         pdb.set_trace()
     34 

/users/avijit.d/.local/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_variables)
    144                     'or with gradient w.r.t. the variable')
    145             gradient = self.data.new().resize_as_(self.data).fill_(1)
--> 146         self._execution_engine.run_backward((self,), (gradient,), retain_variables)
    147 
    148     def register_hook(self, hook):

/users/avijit.d/.local/lib/python3.6/site-packages/torch/autograd/_functions/pointwise.py in backward(self, grad_output)
     16 
     17     def backward(self, grad_output):
---> 18         return self.saved_tensors[0] * grad_output
     19 
     20 

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation