Hello PyTorch community,
I am currently experiencing an error related to the autograd engine and the Global Interpreter Lock (GIL). The error message reads as follows:
"
agent.learn()
loss.backward()
File “c:\python38\lib\site-packages\torch_tensor.py”, line 487, in backward
torch.autograd.backward(
File “c:\python38\lib\site-packages\torch\autograd_init_.py”, line 200, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: The autograd engine was called while holding the GIL. If you are using the C++ API, the autograd engine is an expensive operation that does not require the GIL to be held so you should release it with ‘pybind11::gil_scoped_release no_gil;’. If you are not using the C++ API, please report a bug to the PyTorch team."
Currently I am using the pytorch version 2.0.0+cu117.
This is the code that I am using:
import torch
from torch import nn, optim
import numpy as np
class ActorCritic(nn.Module):
def __init__(self, state_size, action_size, hidden_size=128):
super(ActorCritic, self).__init__()
self.actor = nn.Sequential(
nn.Linear(state_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, action_size),
nn.Softmax(dim=0)
)
self.critic = nn.Sequential(
nn.Linear(state_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, 1)
)
def forward(self, x):
return self.actor(x), self.critic(x)
class Memory:
def __init__(self):
self.states = []
self.actions = []
self.rewards = []
self.next_states = []
self.dones = []
def add(self, state, action, reward, next_state, done):
self.states.append(state)
self.actions.append(action)
self.rewards.append(reward)
self.next_states.append(next_state)
self.dones.append(done)
def clear(self):
self.states = []
self.actions = []
self.rewards = []
self.next_states = []
self.dones = []
class Agent:
def __init__(self, state_size, action_size, lr=0.001, gamma=0.99):
self.actor_critic = ActorCritic(state_size, action_size)
self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=lr)
self.memory = Memory()
self.gamma = gamma
def act(self, state):
state = torch.FloatTensor(state)
action_probs, _ = self.actor_critic(state)
action_probs = [action_probs.detach().numpy()]
action = np.random.choice(len(action_probs[0]), p=action_probs[0])
return action
def learn(self):
states = torch.FloatTensor(self.memory.states)
actions = torch.LongTensor(self.memory.actions)
rewards = torch.FloatTensor(self.memory.rewards)
next_states = torch.FloatTensor(self.memory.next_states)
dones = torch.FloatTensor(self.memory.dones)
_, critic_values = self.actor_critic(states)
_, next_critic_values = self.actor_critic(next_states)
td_errors = rewards + self.gamma * next_critic_values - critic_values
actor_loss = -(torch.log_softmax(self.actor_critic.actor(states), dim=1)[range(len(actions)), actions] * td_errors.detach()).mean()
critic_loss = td_errors.pow(2).mean()
loss = actor_loss + critic_loss
print("Before the problem")
self.optimizer.zero_grad()
loss.backward()
print("After the problem")
self.optimizer.step()
def remember(self, state, action, reward, next_state, done):
self.memory.add(state, action, reward, next_state, done)
def clear_memory(self):
self.memory.clear()
I am not sure what is causing this error, and I am looking for some guidance on how to fix it. Can anyone in the community provide some insight into what might be causing this error and how I can resolve it?
Thank you in advance for your help. I look forward to your responses.