One of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [1431, 20]],

I am getting this error can anyone help me to resolve the issue

/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/torch/autograd/__init__.py:130: UserWarning: Error detected in MmBackward. Traceback of forward call that caused the error:
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
    self._run_once()
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
    handle._run()
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/ioloop.py", line 688, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/ioloop.py", line 741, in _run_callback
    ret = callback()
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/gen.py", line 814, in inner
    self.ctx_run(self.run)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/gen.py", line 775, in run
    yielded = self.gen.send(value)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 358, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 536, in execute_request
    self.do_execute(
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/tornado/gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 302, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 539, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2898, in run_cell
    result = self._run_cell(
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2944, in _run_cell
    return runner(coro)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3169, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3361, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-140-052c71d28f57>", line 2, in <module>
    net, train_loss, train_accuracy, val_loss, val_accuracy = train(train_loader,  valid_loader, lr=0.0001, epochs=3,  hidden_units=20, net='RNN')
  File "<ipython-input-139-ead948683b23>", line 36, in train
    net_out, h = model(data,h)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "<ipython-input-138-9127ab8f7b70>", line 33, in forward
    return self.rnn_cell(inp, prev_h)
  File "<ipython-input-138-9127ab8f7b70>", line 22, in rnn_cell
    second_x = self.xh(x.detach())
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 93, in forward
    return F.linear(input, self.weight, self.bias)
  File "/Users/arslan/anaconda3/envs/data_work/lib/python3.8/site-packages/torch/nn/functional.py", line 1692, in linear
    output = input.matmul(weight.t())
 (Triggered internally at  /Users/distiller/project/conda/conda-bld/pytorch_1603740477510/work/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
  Variable._execution_engine.run_backward(
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-140-052c71d28f57> in <module>
      1 torch.autograd.set_detect_anomaly(True)
----> 2 net, train_loss, train_accuracy, val_loss, val_accuracy = train(train_loader,  valid_loader, lr=0.0001, epochs=3,  hidden_units=20, net='RNN')

<ipython-input-139-ead948683b23> in train(train_x, valid_x, lr, epochs, hidden_units, net)
     45             label = label.float()
     46             loss = criterion(net_out, label)
---> 47             loss.backward(retain_graph=True)
     48             optimizer.step()
     49 

~/anaconda3/envs/data_work/lib/python3.8/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
    219                 retain_graph=retain_graph,
    220                 create_graph=create_graph)
--> 221         torch.autograd.backward(self, gradient, retain_graph, create_graph)
    222 
    223     def register_hook(self, hook):

~/anaconda3/envs/data_work/lib/python3.8/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
    128         retain_graph = create_graph
    129 
--> 130     Variable._execution_engine.run_backward(
    131         tensors, grad_tensors_, retain_graph, create_graph,
    132         allow_unreachable=True)  # allow_unreachable flag

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [1431, 20]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
class RNN(nn.Module):
    
    def __init__(self,input_size, output_size, hidden_size=64):

        super().__init__()

        self.input_size  = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.xh = nn.Linear(self.input_size, self.hidden_size, bias=False)
        self.hh = nn.Linear(self.hidden_size, self.hidden_size)
        self.hy = nn.Linear(self.hidden_size, self.output_size)
        
        
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax(dim=1)
        self.sigmoid = nn.Sigmoid()

    def rnn_cell(self, x, prev_h):  
        first_h = self.hh(prev_h)
        second_x = self.xh(x)

        act = second_x + first_h
        h = self.tanh(act)

        updated_c = self.sigmoid(self.hy(h))

        return updated_c, h


    def forward(self, inp, prev_h):
        return self.rnn_cell(inp, prev_h)

I cannot reproduce the issue using:

model = RNN(1, 1)
x = torch.randn(1, 1)
target = torch.randint(0, 2, (1, 1)).float()
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

for epoch in range(10):
    optimizer.zero_grad()
    prev = torch.randn(1, 64)
    output, h = model(x, prev)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    prev = h.detach()

Could you post an executable code snippet, which would reproduce the error you are seeing?

this is the training code

def train(train_x,  valid_x, lr, epochs, hidden_units, net='RNN'):
    
    for step, (data, label) in enumerate(train_x):
        inputs = np.array(data)
        break

    if net=='RNN':
        h = torch.zeros(hidden_units).requires_grad_()
        model = RNN(inputs.shape[1], 1, hidden_units)
    elif net == 'LSTM':
        h = torch.zeros(hidden_units).requires_grad_()
        c = torch.zeros(hidden_units).requires_grad_()
        model = LSTM(inputs.shape[1], 1, hidden_units)
    elif net == 'GRU':
        St_1 = torch.zeros(hidden_units).requires_grad_()
        model = GRUModel(inputs.shape[1], 1, hidden_units)
    model.to(device)
    
    
    train_loss, val_loss = [],[]
    train_accuracy, val_accuracy = [], []
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    criterion = nn.BCELoss()

    
    
    for ep in range(epochs):
        running_loss, correct = 0, 0
        for i, (data, label) in enumerate(train_x):
            data, label = Variable(data), Variable(label)
            data, label = data.to(device), label.to(device)
            
            optimizer.zero_grad()
            
            if net == 'RNN':
                net_out, h = model(data, h)
            elif net == 'LSTM':
                net_out, h, c = model(data, h, c)
            elif net == 'GRU':
                net_out, St_1 = model(data, St_1)
                
            
            label = torch.reshape(label, (label.shape[0], 1))
            net_out = torch.reshape(net_out, (label.shape[0], 1))
            label = label.float()
            loss = criterion(net_out, label)
            loss.backward(retain_graph=True)
            optimizer.step()

            running_loss += loss.item()
#             pred = torch.argmax(net_out, axis=1)  # get the index of the max log-probability
#             actual = torch.argmax(label, axis=1)
            out = (net_out>0.5).float()
            correct += out.eq(label).sum()



        print(running_loss)
        print("Epoch:", ep)
        print(correct.item())
        print("Training Accuracy:", 100. * correct.item() / len(train_x.dataset))
        print("Train Loss:", running_loss / len(train_x.dataset))
        train_loss.append(running_loss / len(train_x.dataset))
        train_accuracy.append(correct / len(train_x.dataset))


#         test_loss = 0
#         correct = 0
#         with torch.no_grad():
#             for batch_idx, (data, target) in enumerate(valid_x):
#                 data, target = Variable(data), Variable(target)
#                 data, target = data.to(device), target.to(device)
#     #                 data = data.view(-1, 784)
#                 if net == 'RNN':
#                     net_out, _ = model(data, h)
#                 elif net == 'LSTM':
#                     net_out, _, _ = model(data, h, c)
#                 elif net == 'GRU':
#                     net_out, _ = model(data, St_1)
#                 net_out = torch.reshape(net_out, (net_out.shape[0],))
#                 # sum up batch loss
#                 target = target.float()
#                 test_loss += criterion(net_out, target).item()
#     #                 pred = torch.argmax(net_out, axis=1)  # get the index of the max log-probability
#     #                 actual = torch.argmax(label, axis=1)
#                 out = (net_out>0.5).float()
#                 correct += out.eq(target).sum()
#             val_loss.append(test_loss / len(valid_x.dataset))
#             val_accuracy.append(correct / len(valid_x.dataset))

#         print("Validation Accuracy:" , 100. * correct.item() / len(valid_x.dataset))
#         print("Validation Loss:", test_loss / len(valid_x.dataset)) 
#         print("----------------------------------------------------------")
    
    return model, train_loss, train_accuracy, val_loss, val_accuracy
  

The code snippet is unfortunately not executable, so please add the missing code to create random tensors in the expected shapes, which would then reproduce this issue.

Here is the whole code to generate data

class MyDataset(Dataset):
    def __init__(self, data, targets):
        self.data = torch.from_numpy(data).type(torch.float)
        self.targets = targets

    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        return x, y

    def __len__(self):
        return len(self.data)

def numpy_to_dataloader(data, targets, batch_size = 50, validation_split = .2, shuffle_dataset = True):
    
    random_seed= 42
    data = data
    targets = targets
    dataset = MyDataset(data, targets)
    


    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    split = int(np.floor(validation_split * dataset_size))
    if shuffle_dataset :
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]


    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                               sampler=train_sampler)
    validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                    sampler=valid_sampler)
    
    return train_loader, validation_loader

dummy data and run the model


X = torch.randn(30, 20)
y = torch.randint(2, (30, ))
train_loader, valid_loader = numpy_to_dataloader(X, y, batch_size = 20, validation_split = .2, shuffle_dataset = True)
net, train_loss, train_accuracy, val_loss, val_accuracy = train(train_loader,  valid_loader, lr=0.0001, epochs=10,  hidden_units=64, net='RNN')

@ptrblck can you please check the code thanks

The code is unfortunately still not executable. After fixing some np.array/tensor issues as well as device mismatches, I get:

RuntimeError: The size of tensor a (4) must match the size of tensor b (20) at non-singleton dimension 0