RuntimeError: The autograd engine was called while holding the GIL

Hello PyTorch community,

I am currently experiencing an error related to the autograd engine and the Global Interpreter Lock (GIL). The error message reads as follows:

"
agent.learn()
loss.backward()
File “c:\python38\lib\site-packages\torch_tensor.py”, line 487, in backward
torch.autograd.backward(
File “c:\python38\lib\site-packages\torch\autograd_init_.py”, line 200, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: The autograd engine was called while holding the GIL. If you are using the C++ API, the autograd engine is an expensive operation that does not require the GIL to be held so you should release it with ‘pybind11::gil_scoped_release no_gil;’. If you are not using the C++ API, please report a bug to the PyTorch team."

Currently I am using the pytorch version 2.0.0+cu117.

This is the code that I am using:

import torch
from torch import nn, optim
import numpy as np
class ActorCritic(nn.Module):
    def __init__(self, state_size, action_size, hidden_size=128):
        super(ActorCritic, self).__init__()

        self.actor = nn.Sequential(
            nn.Linear(state_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, action_size),
            nn.Softmax(dim=0)
        )

        self.critic = nn.Sequential(
            nn.Linear(state_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, x):
        return self.actor(x), self.critic(x)

class Memory:
    def __init__(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.next_states = []
        self.dones = []

    def add(self, state, action, reward, next_state, done):
        self.states.append(state)
        self.actions.append(action)
        self.rewards.append(reward)
        self.next_states.append(next_state)
        self.dones.append(done)

    def clear(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.next_states = []
        self.dones = []

class Agent:
    def __init__(self, state_size, action_size, lr=0.001, gamma=0.99):
        self.actor_critic = ActorCritic(state_size, action_size)
        self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=lr)
        self.memory = Memory()
        self.gamma = gamma

    def act(self, state):
        state = torch.FloatTensor(state)
        action_probs, _ = self.actor_critic(state)
        action_probs = [action_probs.detach().numpy()]
        action = np.random.choice(len(action_probs[0]), p=action_probs[0])
        return action

    def learn(self):
        states = torch.FloatTensor(self.memory.states)
        actions = torch.LongTensor(self.memory.actions)
        rewards = torch.FloatTensor(self.memory.rewards)
        next_states = torch.FloatTensor(self.memory.next_states)
        dones = torch.FloatTensor(self.memory.dones)

        _, critic_values = self.actor_critic(states)
        _, next_critic_values = self.actor_critic(next_states)
        td_errors = rewards + self.gamma * next_critic_values - critic_values
        actor_loss = -(torch.log_softmax(self.actor_critic.actor(states), dim=1)[range(len(actions)), actions] * td_errors.detach()).mean()
        critic_loss = td_errors.pow(2).mean()

        loss = actor_loss + critic_loss
        print("Before the problem")
        self.optimizer.zero_grad()
        loss.backward()
        print("After the problem")
        self.optimizer.step()

    def remember(self, state, action, reward, next_state, done):
        self.memory.add(state, action, reward, next_state, done)

    def clear_memory(self):
        self.memory.clear()

I am not sure what is causing this error, and I am looking for some guidance on how to fix it. Can anyone in the community provide some insight into what might be causing this error and how I can resolve it?

Thank you in advance for your help. I look forward to your responses.

Could you post the missing parts of the code to make it executable, please, so that I could try to reproduce the issue?