untimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4096]] is at version 3; expected version 1 instead

Hi, I have been trying to implement LSTM with DQN and encounter this error: Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4096]] is at version 3; expected version 1 instead.

This error only occurs while doing backward propagation for the second time and I’m not sure how to troubleshoot this error. It would be much appreciated if you help me tackle this issue. Thank you in advance.

class dqn(nn.Module):
    def __init__(self, batch_size, input_dim, output_dim, device):
        super(dqn, self).__init__()
        self.cnn_output = 6*9
        self.batch_size = batch_size
        self.lstm_input_size = 6*9*16 + 7
        hidden_size = 1024

        self.hidden_state = None
        self.cell_state = None

        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv1 = nn.Conv2d(in_channels=input_dim, out_channels=2, kernel_size=(8,8),  padding=5)
        self.conv2 = nn.Conv2d(in_channels=2, out_channels=4, kernel_size=(5,5), stride=1, padding=2)
        self.conv3 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=(3,3), stride=2, padding=1)
        self.conv4 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), padding=0)

        self.lstm = nn.LSTMCell(self.lstm_input_size, hidden_size)

        self.fc1 = nn.Linear(hidden_size, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 32)
        self.out = nn.Linear(32, output_dim)

    def forward(self, frame, pos, hidden_state, cell_state):
        x = frame.type(torch.float)
        x = self.max_pool(F.relu(self.conv1(x)))
        x = self.max_pool(F.relu(self.conv2(x)))
        x = self.max_pool(F.relu(self.conv3(x)))
        x = self.max_pool(F.relu(self.conv4(x)))

        flattened_output = x.view(-1, self.cnn_output*16)

        self.hidden_state, self.cell_state = self.lstm(torch.cat([flattened_output, pos.view(-1, 7)], dim=1), (hidden_state, cell_state))

        fc1_out = F.relu(self.fc1(self.hidden_state))
        fc2_out = F.relu(self.fc2(fc1_out))
        fc3_out = F.relu(self.fc3(fc2_out))

        out = self.out(fc3_out)

        return out
    
    def lstm_states(self):
        return self.hidden_state, self.cell_state
  def learn(self):
      frame_now, pos_now, hidden_state_now, cell_state_now, action_taken, reward_received, next_frame, next_pos, next_hidden_state, next_cell_state, eps_end = self.memory.sample(BATCH_SIZE)
      current_q_vals = self.policy_net(frame_now, pos_now, hidden_state_now, cell_state_now).gather(1, action_taken)
      next_q_vals = self.policy_net(next_frame, next_pos, next_hidden_state, next_cell_state).max(1, keepdim=True)[0].detach()
      target = (reward_received + GAMMA * next_q_vals * (1 - eps_end)).to(device)
      loss = F.smooth_l1_loss(current_q_vals, target)
      self.optimizer.zero_grad()
      loss.backward()
      self.optimizer.step()

This is difficult to debug as it looks like some code is missing (e.g., how is policy_net implemented?). However, I would suspect that you might want to reconsider how your lstm_states method is being used. If self.hidden_state and self.cell_state are being reused during multiple backward passes then you may need to detach them from your graph and/or clone them rather than passing them directly.