Error with the backward function

simeonkpy · August 24, 2022, 2:41am

Hello,
I am new to PyTorch, and I followed a tutorial to implement a Deep Q RL algorithm. The code worked fine, and I wanted to improve it by having a non-fixed number of layer in the NN algorithm, but I have an error that I don’t understand.

Here is the class of the network, the commented functions are the original ones, that work. I basically used a parameter list instead of hard-coding a variable for each layer.

import numpy as np
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import constantes as cst

class DeepQNetwork(nn.Module):
    # def __init__(self, lr, inputDims, fc1Dims, fc2Dims, nbrActions):
    #     super(DeepQNetwork, self).__init__()
    #     self.inputDims = inputDims
    #     self.fc1Dims = fc1Dims
    #     self.fc2Dims = fc2Dims
    #     self.nbrActions = nbrActions

    #     self.fc1 = nn.Linear(*self.inputDims, self.fc1Dims)
    #     self.fc2 = nn.Linear(self.fc1Dims, self.fc2Dims) 
    #     self.fc3 = nn.Linear(self.fc2Dims, self.nbrActions)
    #     self.optimizer = optim.Adam(self.parameters(), lr=lr) 
    #     self.loss = nn.MSELoss()
    #     self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
    #     self.to(self.device)

    def __init__(self, lr, inputDims, layersDim, nbrActions):
        super(DeepQNetwork, self).__init__()
        T.autograd.set_detect_anomaly(True)
        self.inputDims = inputDims
        if len(layersDim)<2:
            print("SIZE SHOULD BE >= 2")
            return
        self.layerDim = layersDim
        self.nbrActions = nbrActions

        self.fct = nn.ParameterList()
        tempInput = self.inputDims
        for dim in layersDim:
            self.fct.append(nn.Linear(tempInput, dim))
            tempInput = dim
        self.fct.append(nn.Linear(tempInput, self.nbrActions))

        self.optimizer = optim.Adam(self.parameters(), lr=lr) 
        self.loss = nn.MSELoss()
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    # def forward(self, state):
    #     x = F.relu(self.fc1(state))
    #     x = F.relu(self.fc2(x))
    #     actions = self.fc3(x)

    #     return actions

    def forward(self, state):
        x = F.relu(self.fct[0](state))
        for layer in self.fct[1:]:
            x = F.relu(layer(x))       

        return x # actions

And the agent:

class Agent():
    def __init__(self, gamma, epsilon, lr, inputDims, batchSize, nbrActions, maxMemSize=100000, epsEnd=0.01, epsDec=5e-4):
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsMin = epsEnd
        self.epsDec = epsDec
        self.lr = lr
        self.actionSpace = [i for i in range(nbrActions)]
        self.memSize = maxMemSize
        self.batchSize = batchSize
        self.memCounter = 0
        T.autograd.set_detect_anomaly(True)

        self.Q_eval = DeepQNetwork(self.lr, nbrActions=nbrActions, inputDims=inputDims, layersDim=[cst.FC1_DIM, cst.FC2_DIM])

        self.stateMemory = np.zeros((self.memSize, inputDims), dtype=np.float32)
        self.newStateMemory = np.zeros((self.memSize, inputDims), dtype=np.float32)
        self.actionMemory = np.zeros(self.memSize, dtype=np.int32)
        self.rewardMemory = np.zeros(self.memSize, dtype=np.float32)
        self.terminalMemory = np.zeros(self.memSize, dtype=np.bool8)

    def storeTransition(self, state, action, reward, state_, done):
        index = self.memCounter % self.memSize
        self.stateMemory[index] = state
        self.newStateMemory[index] = state_
        self.rewardMemory[index] = reward
        self.actionMemory[index] = action
        self.terminalMemory[index] = done

        self.memCounter +=1

    def chooseAction(self, observation):
        if np.random.random() > self.epsilon:
            state = T.tensor([observation]).to(self.Q_eval.device)
            actions = self.Q_eval.forward(state)
            action = T.argmax(actions).item()
        else:
            action = np.random.choice(self.actionSpace)

        return action

    def learn(self):
        if self.memCounter < self.batchSize:
            return

        self.Q_eval.optimizer.zero_grad()

        maxMem = min(self.memCounter, self.memSize)
        batch = np.random.choice(maxMem, self.batchSize, replace=False)

        batchIndex = np.arange(self.batchSize, dtype=np.int32)

        stateBatch = T.tensor(self.stateMemory[batch]).to(self.Q_eval.device)
        newStateBatch = T.tensor(self.newStateMemory[batch]).to(self.Q_eval.device)
        rewardBatch = T.tensor(self.rewardMemory[batch]).to(self.Q_eval.device)
        terminalBatch = T.tensor(self.terminalMemory[batch]).to(self.Q_eval.device)

        actionBatch = self.actionMemory[batch]

        qEval = self.Q_eval.forward(stateBatch)[batchIndex, actionBatch]
        qNext = self.Q_eval.forward(newStateBatch)
        qNext[terminalBatch] = 0.0
 
        qTarget = rewardBatch + self.gamma * T.max(qNext, dim=1)[0]

        loss = self.Q_eval.loss(qTarget, qEval).to(self.Q_eval.device)
        loss.backward()
        self.Q_eval.optimizer.step()

        self.epsilon = self.epsilon - self.epsDec if self.epsilon > self.epsMin \
                        else self.epsMin

Here is the error message:

C:\Users\PATH_PYTHON\torch\autograd\__init__.py:173: UserWarning: Error detected in ReluBackward0. Traceback of forward call that caused the error:
  File "c:\Users\PATH\main.py", line 34, in <module>
    agent.learn()
  File "c:\Users\PATH\DeepQNetwork.py", line 121, in learn
    qNext = self.Q_eval.forward(newStateBatch)
  File "c:\Users\PATH\DeepQNetwork.py", line 58, in forward
    x = F.relu(layer(x))
  File "C:\Users\PATH_PYTHON\torch\nn\functional.py", line 1457, in relu
    result = torch.relu(input)
 (Triggered internally at  C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
  File "c:\Users\PATH\main.py", line 34, in <module>
    agent.learn()
  File "c:\Users\PATH\DeepQNetwork.py", line 127, in learn
    loss.backward()
  File "C:\Users\PATH_PYTHON\torch\_tensor.py", line 396, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "C:\Users\PATH_PYTHON\torch\autograd\__init__.py", line 173, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [64, 4]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

I really don’t understand why my modification creates that problem.
Thanks in advance for your help!

ptrblck · August 24, 2022, 3:18am

This inplace operation:

qNext[terminalBatch] = 0.0

might be causing the issue. Could you try to comment it out and see if this would fix the issue?

simeonkpy · August 24, 2022, 5:51am

It was the problem, it is working now!
Why does it cause a problem with my new version and not with the previous one?

ptrblck · August 24, 2022, 8:07am

I don’t know which version you were using before but additional checks might have been added or the backward definition of a method might have been changed and the operation could use the output now instead of the input (or vice versa) to compute the gradients etc.

simeonkpy · August 25, 2022, 7:50am

After checking my code, I realised I did an error when I rewrote the forward function. I used the relu function for the last layer, instead of using the function straight. That was causing the problem.