Hello,
I am new to PyTorch, and I followed a tutorial to implement a Deep Q RL algorithm. The code worked fine, and I wanted to improve it by having a non-fixed number of layer in the NN algorithm, but I have an error that I don’t understand.
Here is the class of the network, the commented functions are the original ones, that work. I basically used a parameter list instead of hard-coding a variable for each layer.
import numpy as np
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import constantes as cst
class DeepQNetwork(nn.Module):
# def __init__(self, lr, inputDims, fc1Dims, fc2Dims, nbrActions):
# super(DeepQNetwork, self).__init__()
# self.inputDims = inputDims
# self.fc1Dims = fc1Dims
# self.fc2Dims = fc2Dims
# self.nbrActions = nbrActions
# self.fc1 = nn.Linear(*self.inputDims, self.fc1Dims)
# self.fc2 = nn.Linear(self.fc1Dims, self.fc2Dims)
# self.fc3 = nn.Linear(self.fc2Dims, self.nbrActions)
# self.optimizer = optim.Adam(self.parameters(), lr=lr)
# self.loss = nn.MSELoss()
# self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
# self.to(self.device)
def __init__(self, lr, inputDims, layersDim, nbrActions):
super(DeepQNetwork, self).__init__()
T.autograd.set_detect_anomaly(True)
self.inputDims = inputDims
if len(layersDim)<2:
print("SIZE SHOULD BE >= 2")
return
self.layerDim = layersDim
self.nbrActions = nbrActions
self.fct = nn.ParameterList()
tempInput = self.inputDims
for dim in layersDim:
self.fct.append(nn.Linear(tempInput, dim))
tempInput = dim
self.fct.append(nn.Linear(tempInput, self.nbrActions))
self.optimizer = optim.Adam(self.parameters(), lr=lr)
self.loss = nn.MSELoss()
self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
self.to(self.device)
# def forward(self, state):
# x = F.relu(self.fc1(state))
# x = F.relu(self.fc2(x))
# actions = self.fc3(x)
# return actions
def forward(self, state):
x = F.relu(self.fct[0](state))
for layer in self.fct[1:]:
x = F.relu(layer(x))
return x # actions
And the agent:
class Agent():
def __init__(self, gamma, epsilon, lr, inputDims, batchSize, nbrActions, maxMemSize=100000, epsEnd=0.01, epsDec=5e-4):
self.gamma = gamma
self.epsilon = epsilon
self.epsMin = epsEnd
self.epsDec = epsDec
self.lr = lr
self.actionSpace = [i for i in range(nbrActions)]
self.memSize = maxMemSize
self.batchSize = batchSize
self.memCounter = 0
T.autograd.set_detect_anomaly(True)
self.Q_eval = DeepQNetwork(self.lr, nbrActions=nbrActions, inputDims=inputDims, layersDim=[cst.FC1_DIM, cst.FC2_DIM])
self.stateMemory = np.zeros((self.memSize, inputDims), dtype=np.float32)
self.newStateMemory = np.zeros((self.memSize, inputDims), dtype=np.float32)
self.actionMemory = np.zeros(self.memSize, dtype=np.int32)
self.rewardMemory = np.zeros(self.memSize, dtype=np.float32)
self.terminalMemory = np.zeros(self.memSize, dtype=np.bool8)
def storeTransition(self, state, action, reward, state_, done):
index = self.memCounter % self.memSize
self.stateMemory[index] = state
self.newStateMemory[index] = state_
self.rewardMemory[index] = reward
self.actionMemory[index] = action
self.terminalMemory[index] = done
self.memCounter +=1
def chooseAction(self, observation):
if np.random.random() > self.epsilon:
state = T.tensor([observation]).to(self.Q_eval.device)
actions = self.Q_eval.forward(state)
action = T.argmax(actions).item()
else:
action = np.random.choice(self.actionSpace)
return action
def learn(self):
if self.memCounter < self.batchSize:
return
self.Q_eval.optimizer.zero_grad()
maxMem = min(self.memCounter, self.memSize)
batch = np.random.choice(maxMem, self.batchSize, replace=False)
batchIndex = np.arange(self.batchSize, dtype=np.int32)
stateBatch = T.tensor(self.stateMemory[batch]).to(self.Q_eval.device)
newStateBatch = T.tensor(self.newStateMemory[batch]).to(self.Q_eval.device)
rewardBatch = T.tensor(self.rewardMemory[batch]).to(self.Q_eval.device)
terminalBatch = T.tensor(self.terminalMemory[batch]).to(self.Q_eval.device)
actionBatch = self.actionMemory[batch]
qEval = self.Q_eval.forward(stateBatch)[batchIndex, actionBatch]
qNext = self.Q_eval.forward(newStateBatch)
qNext[terminalBatch] = 0.0
qTarget = rewardBatch + self.gamma * T.max(qNext, dim=1)[0]
loss = self.Q_eval.loss(qTarget, qEval).to(self.Q_eval.device)
loss.backward()
self.Q_eval.optimizer.step()
self.epsilon = self.epsilon - self.epsDec if self.epsilon > self.epsMin \
else self.epsMin
Here is the error message:
C:\Users\PATH_PYTHON\torch\autograd\__init__.py:173: UserWarning: Error detected in ReluBackward0. Traceback of forward call that caused the error:
File "c:\Users\PATH\main.py", line 34, in <module>
agent.learn()
File "c:\Users\PATH\DeepQNetwork.py", line 121, in learn
qNext = self.Q_eval.forward(newStateBatch)
File "c:\Users\PATH\DeepQNetwork.py", line 58, in forward
x = F.relu(layer(x))
File "C:\Users\PATH_PYTHON\torch\nn\functional.py", line 1457, in relu
result = torch.relu(input)
(Triggered internally at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
File "c:\Users\PATH\main.py", line 34, in <module>
agent.learn()
File "c:\Users\PATH\DeepQNetwork.py", line 127, in learn
loss.backward()
File "C:\Users\PATH_PYTHON\torch\_tensor.py", line 396, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "C:\Users\PATH_PYTHON\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 4]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
I really don’t understand why my modification creates that problem.
Thanks in advance for your help!