Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead

Hi, I am trying to write asynchronous actor-critic in pytorch. While calculating the generalized advantage estimate I ran into an unusual error.

The error was reported by the line I highlighted in the code using ** . The error raised was Can’t call numpy() on Variable that requires grad. Use var.detach().numpy() instead. If I use detach on the values then the code runs fine but I guess since detach removes the values variable from the computational graph and no gradients will be calculated for them. Hence, gradients calculated will wrong. Please correct me if I am wrong. Thanks.

The line that generates this error is :

delta_t = rewards[i] + args.gamma * values[i + 1] - values[i]

Replacing this line with this works fine :

delta_t = rewards[i] + args.gamma * values[i + 1].detach() - values[i].detach()

Code

def train(rank, args, shared_model, counter, lock, optimizer=None):

	torch.manual_seed(args.seed+rank)

	cell_size = 1.0
	view_size = args.view_size
	max_vel = args.max_speed_agent
	time_period = 0.2
	DENSITIES  = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
	env = TrafficSim(render=False, render_grid=True,time_period=time_period,fps=20,cell_size=cell_size,view_size=view_size,comm_mode=False,frac_cells=0.6,regions_width=3,max_vel=max_vel,lane_lambda=0.0,query_lambda=0.0,trajec_file='tsim/micro.pkl')

	model = ActorCritic(args.observation_size, args.num_actions)
	writer = SummaryWriter(args.logdir+args.exp_name+"_process_"+str(rank))

	if optimizer is None:
		optimizer = optim.Adam(shared_model.parameters(), lr=args.lr)

	model.train()

	sampled_prob = random.sample(DENSITIES, k=1)[0]
	state = env.reset(sampled_prob)
	state = torch.from_numpy(state).float()
	done = True

	episode_length = 0

	while True:

		# Sync with the shared model at the start of episode or after tmax
		model.load_state_dict(shared_model.state_dict())

		if done:
			cx = torch.zeros(1, model.memsize)
			hx = torch.zeros(1, model.memsize)
		else:
			cx = cx.detach()
			hx = hx.detach()

		values = []
		log_probs = []
		rewards = []
		entropies = []

		for step in range(args.num_steps):

			episode_length += 1
			value, logit, (hx, cx) = model((state.unsqueeze(0), (hx, cx)))

			prob = F.softmax(logit, dim=-1)
			log_prob = F.log_softmax(logit, dim=-1)
			entropy = -(log_prob * prob).sum(1, keepdim=True)
			entropies.append(entropy)

			action = prob.multinomial(num_samples=1).detach()
			log_prob = log_prob.gather(1, action)

			reward, state, done = env.step(action.numpy()[0,0],"NULL")

			done = done or episode_length >= args.max_epsiode_length
			#reward = max(min(reward, 1), -1)

			with lock:
				counter.value += 1

			if (counter.value % args.save_interval_steps) == 0:
				print("Saved Weights")
				torch.save(shared_model.state_dict(),"weights_"+args.exp_name)

			if done:
				episode_length = 0
				sampled_prob = random.sample(DENSITIES, k=1)[0]
				state = np.array(env.reset(sampled_prob)).flatten()

			state = torch.from_numpy(state).float()
			values.append(value)
			log_probs.append(log_prob)
			rewards.append(reward)

			if done:
				break

		R = torch.zeros(1,1)

		if not done:
			value, _, _ = model((state.unsqueeze(0), (hx,cx)))
			R = value.detach()

		values.append(R)
		policy_loss = 0.0
		value_loss = 0.0
		gae = torch.zeros(1,1)

		for i in reversed(range(len(rewards))):
			R = args.gamma * R + rewards[i]
			advantage = R - values[i]
			value_loss = value_loss + 0.5 * advantage.pow(2)
			# GAE
			**delta_t = rewards[i] + args.gamma * values[i + 1] - values[i]**
			gae = gae * args.gamma * args.tau + delta_t

			policy_loss = policy_loss - log_probs[i] * gae.detach() - args.entropy_coef * entropies[i]

		optimizer.zero_grad()

		print("Worker : %d, Policy Loss : %.2f, Value Loss : %.2f, Entropy : %.2f"%(rank, policy_loss.item(), value_loss.item(), 0.0))
		writer.add_scalar("Policy Loss",policy_loss.item(),counter.value)
		writer.add_scalar("Value Loss",value_loss.item(),counter.value)

		(policy_loss + args.value_loss_coef * value_loss).backward()
		torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

		ensure_shared_grads(model, shared_model)
		optimizer.step()

The error serves to remind you that when you call numpy(), you are leaving autograd-land. The most local fix is to take the advice literally and replace .numpy() by .detach().numpy() You will not be able to backpropagate through env.step, but instead have to use one of the RL techniques to get something to put into the .backward(SOMETHINGHERE) of (log)prob (?, likely).

Best regards

Thomas

1 Like

maybe you can put with torch.no_grad(): before your code, and try again

1 Like
RuntimeError: Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead.

You can call detach() method to make it normal.