Error with the backward function

I am new to PyTorch, and I followed a tutorial to implement a Deep Q RL algorithm. The code worked fine, and I wanted to improve it by having a non-fixed number of layer in the NN algorithm, but I have an error that I don’t understand.

Here is the class of the network, the commented functions are the original ones, that work. I basically used a parameter list instead of hard-coding a variable for each layer.

import numpy as np
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import constantes as cst

class DeepQNetwork(nn.Module):
    # def __init__(self, lr, inputDims, fc1Dims, fc2Dims, nbrActions):
    #     super(DeepQNetwork, self).__init__()
    #     self.inputDims = inputDims
    #     self.fc1Dims = fc1Dims
    #     self.fc2Dims = fc2Dims
    #     self.nbrActions = nbrActions

    #     self.fc1 = nn.Linear(*self.inputDims, self.fc1Dims)
    #     self.fc2 = nn.Linear(self.fc1Dims, self.fc2Dims) 
    #     self.fc3 = nn.Linear(self.fc2Dims, self.nbrActions)
    #     self.optimizer = optim.Adam(self.parameters(), lr=lr) 
    #     self.loss = nn.MSELoss()
    #     self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')

    def __init__(self, lr, inputDims, layersDim, nbrActions):
        super(DeepQNetwork, self).__init__()
        self.inputDims = inputDims
        if len(layersDim)<2:
            print("SIZE SHOULD BE >= 2")
        self.layerDim = layersDim
        self.nbrActions = nbrActions

        self.fct = nn.ParameterList()
        tempInput = self.inputDims
        for dim in layersDim:
            self.fct.append(nn.Linear(tempInput, dim))
            tempInput = dim
        self.fct.append(nn.Linear(tempInput, self.nbrActions))

        self.optimizer = optim.Adam(self.parameters(), lr=lr) 
        self.loss = nn.MSELoss()
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')

    # def forward(self, state):
    #     x = F.relu(self.fc1(state))
    #     x = F.relu(self.fc2(x))
    #     actions = self.fc3(x)

    #     return actions

    def forward(self, state):
        x = F.relu(self.fct[0](state))
        for layer in self.fct[1:]:
            x = F.relu(layer(x))       

        return x # actions

And the agent:

class Agent():
    def __init__(self, gamma, epsilon, lr, inputDims, batchSize, nbrActions, maxMemSize=100000, epsEnd=0.01, epsDec=5e-4):
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsMin = epsEnd
        self.epsDec = epsDec = lr
        self.actionSpace = [i for i in range(nbrActions)]
        self.memSize = maxMemSize
        self.batchSize = batchSize
        self.memCounter = 0

        self.Q_eval = DeepQNetwork(, nbrActions=nbrActions, inputDims=inputDims, layersDim=[cst.FC1_DIM, cst.FC2_DIM])

        self.stateMemory = np.zeros((self.memSize, inputDims), dtype=np.float32)
        self.newStateMemory = np.zeros((self.memSize, inputDims), dtype=np.float32)
        self.actionMemory = np.zeros(self.memSize, dtype=np.int32)
        self.rewardMemory = np.zeros(self.memSize, dtype=np.float32)
        self.terminalMemory = np.zeros(self.memSize, dtype=np.bool8)

    def storeTransition(self, state, action, reward, state_, done):
        index = self.memCounter % self.memSize
        self.stateMemory[index] = state
        self.newStateMemory[index] = state_
        self.rewardMemory[index] = reward
        self.actionMemory[index] = action
        self.terminalMemory[index] = done

        self.memCounter +=1

    def chooseAction(self, observation):
        if np.random.random() > self.epsilon:
            state = T.tensor([observation]).to(self.Q_eval.device)
            actions = self.Q_eval.forward(state)
            action = T.argmax(actions).item()
            action = np.random.choice(self.actionSpace)

        return action

    def learn(self):
        if self.memCounter < self.batchSize:


        maxMem = min(self.memCounter, self.memSize)
        batch = np.random.choice(maxMem, self.batchSize, replace=False)

        batchIndex = np.arange(self.batchSize, dtype=np.int32)

        stateBatch = T.tensor(self.stateMemory[batch]).to(self.Q_eval.device)
        newStateBatch = T.tensor(self.newStateMemory[batch]).to(self.Q_eval.device)
        rewardBatch = T.tensor(self.rewardMemory[batch]).to(self.Q_eval.device)
        terminalBatch = T.tensor(self.terminalMemory[batch]).to(self.Q_eval.device)

        actionBatch = self.actionMemory[batch]

        qEval = self.Q_eval.forward(stateBatch)[batchIndex, actionBatch]
        qNext = self.Q_eval.forward(newStateBatch)
        qNext[terminalBatch] = 0.0
        qTarget = rewardBatch + self.gamma * T.max(qNext, dim=1)[0]

        loss = self.Q_eval.loss(qTarget, qEval).to(self.Q_eval.device)

        self.epsilon = self.epsilon - self.epsDec if self.epsilon > self.epsMin \
                        else self.epsMin

Here is the error message:

C:\Users\PATH_PYTHON\torch\autograd\ UserWarning: Error detected in ReluBackward0. Traceback of forward call that caused the error:
  File "c:\Users\PATH\", line 34, in <module>
  File "c:\Users\PATH\", line 121, in learn
    qNext = self.Q_eval.forward(newStateBatch)
  File "c:\Users\PATH\", line 58, in forward
    x = F.relu(layer(x))
  File "C:\Users\PATH_PYTHON\torch\nn\", line 1457, in relu
    result = torch.relu(input)
 (Triggered internally at  C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
  File "c:\Users\PATH\", line 34, in <module>
  File "c:\Users\PATH\", line 127, in learn
  File "C:\Users\PATH_PYTHON\torch\", line 396, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "C:\Users\PATH_PYTHON\torch\autograd\", line 173, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 4]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

I really don’t understand why my modification creates that problem.
Thanks in advance for your help!

This inplace operation:

qNext[terminalBatch] = 0.0

might be causing the issue. Could you try to comment it out and see if this would fix the issue?

It was the problem, it is working now!
Why does it cause a problem with my new version and not with the previous one?

I don’t know which version you were using before but additional checks might have been added or the backward definition of a method might have been changed and the operation could use the output now instead of the input (or vice versa) to compute the gradients etc.

After checking my code, I realised I did an error when I rewrote the forward function. I used the relu function for the last layer, instead of using the function straight. That was causing the problem.