Hi, I’m running into the following error:
Cudnn RNN backward can only be called in training mode
I’m pretty sure I call model.eval() before backward(), My codes are attached below
def episode(self, deterministic=False):
if self.noise:
self.noise.reset_states()
state, _ = self.env.reset()
steps = 0
log_probs = []
rewards = []
agent_wealth = [1.]
market_wealth = [1.]
while True:
self.actor.eval()
action = self.actor(torch.from_numpy(state[None, ...]).to(self.args.device))
action.squeeze_(0)
if self.noise and not deterministic:
action += torch.from_numpy(self.noise.sample())
weights, log_p = self.get_weights(action)
next_state, reward, done, info = self.env.step(weights)
log_probs.append(log_p)
rewards.append(reward)
agent_wealth.append(info['portfolio_value'])
market_wealth.append(info['market_avg_return'] * market_wealth[-1])
steps += 1
state = next_state
self.total_steps += 1
if done and not deterministic:
# --------update policy ---------
self.actor.train()
agent_sr = SharpeRatio(agent_wealth)
market_sr = SharpeRatio(market_wealth)
policy_gradient = []
for log_ps in log_probs:
policy_gradient.append(- log_ps * (agent_sr - market_sr))
self.optimizer.zero_grad()
policy_gradient = torch.stack(policy_gradient).sum()
policy_gradient.backward()
self.optimizer.step()
# ---------------------------
if done:
break
return sum(rewards), steps
def get_weights(self, action):
values, indices = torch.topk(action, self.args.G)
proportions = torch.softmax(values, dim=-1)
weights = np.zeros_like(action.detach().cpu().numpy())
for i, indice in enumerate(indices):
weights[indice.item()] = proportions[i].detach().cpu().numpy()
log_p = torch.log(proportions)
return weights, log_p
the episode method executes the interacting and learning procedure of the agent, and the get_weights method get the real weight pass to the environment according to the network’s output, as you can see, I called model.train() method before policy_gradient.backward(), so anyone can tell me where is the problem?