Hi, I have been trying to implement LSTM with DQN and encounter this error: Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4096]] is at version 3; expected version 1 instead.
This error only occurs while doing backward propagation for the second time and I’m not sure how to troubleshoot this error. It would be much appreciated if you help me tackle this issue. Thank you in advance.
class dqn(nn.Module):
def __init__(self, batch_size, input_dim, output_dim, device):
super(dqn, self).__init__()
self.cnn_output = 6*9
self.batch_size = batch_size
self.lstm_input_size = 6*9*16 + 7
hidden_size = 1024
self.hidden_state = None
self.cell_state = None
self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv1 = nn.Conv2d(in_channels=input_dim, out_channels=2, kernel_size=(8,8), padding=5)
self.conv2 = nn.Conv2d(in_channels=2, out_channels=4, kernel_size=(5,5), stride=1, padding=2)
self.conv3 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=(3,3), stride=2, padding=1)
self.conv4 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), padding=0)
self.lstm = nn.LSTMCell(self.lstm_input_size, hidden_size)
self.fc1 = nn.Linear(hidden_size, 512)
self.fc2 = nn.Linear(512, 128)
self.fc3 = nn.Linear(128, 32)
self.out = nn.Linear(32, output_dim)
def forward(self, frame, pos, hidden_state, cell_state):
x = frame.type(torch.float)
x = self.max_pool(F.relu(self.conv1(x)))
x = self.max_pool(F.relu(self.conv2(x)))
x = self.max_pool(F.relu(self.conv3(x)))
x = self.max_pool(F.relu(self.conv4(x)))
flattened_output = x.view(-1, self.cnn_output*16)
self.hidden_state, self.cell_state = self.lstm(torch.cat([flattened_output, pos.view(-1, 7)], dim=1), (hidden_state, cell_state))
fc1_out = F.relu(self.fc1(self.hidden_state))
fc2_out = F.relu(self.fc2(fc1_out))
fc3_out = F.relu(self.fc3(fc2_out))
out = self.out(fc3_out)
return out
def lstm_states(self):
return self.hidden_state, self.cell_state
def learn(self):
frame_now, pos_now, hidden_state_now, cell_state_now, action_taken, reward_received, next_frame, next_pos, next_hidden_state, next_cell_state, eps_end = self.memory.sample(BATCH_SIZE)
current_q_vals = self.policy_net(frame_now, pos_now, hidden_state_now, cell_state_now).gather(1, action_taken)
next_q_vals = self.policy_net(next_frame, next_pos, next_hidden_state, next_cell_state).max(1, keepdim=True)[0].detach()
target = (reward_received + GAMMA * next_q_vals * (1 - eps_end)).to(device)
loss = F.smooth_l1_loss(current_q_vals, target)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()