Hi,
I am trying to implement a pytorch 0.2 code in pytorch 1.6
In 0.2 code it contains Variable wrapper in the code as below.
while(True):
values =
log_probs =
rewards =
entropies =for step in range(params.num_steps): if(done): h_out = (Variable(torch.zeros([1, params.lstm_size])), Variable(torch.zeros([1, params.lstm_size]))) state = torch.DoubleTensor(env.reset()) else: h_out = (Variable(h_out[0].data), Variable(h_out[1].data)) h_in = h_out state = state value, action_values, h_out = model((Variable(state.reshape(1,-1)), h_in)) action_values = action_values.reshape(-1,) prob = F.softmax(action_values - max(action_values), dim = 0) log_prob = F.log_softmax(action_values - max(action_values), dim = 0) entropy = -(log_prob * prob).sum() entropies.append(entropy) # action = epsilon_greedy(prob, epsilon) action = Categorical(prob).sample().reshape(-1,) log_prob_a = log_prob.gather(0, Variable(action)) values.append(value) log_probs.append(log_prob_a) # print("action_values:",action_values) # print("prob:",prob) # print("log_prob:",log_prob) # print("action:",action, "log_prob_a:",log_prob_a) state, reward, done, info, _ = env.step(action) # reward = max(min(reward, 1), -1) count +=1 if done: state = env.reset() rewards.append(reward) if done: break R = torch.zeros(1, 1) if not done: value, _, _ = model((Variable(state.reshape(1,-1)), h_out)) R = value.data values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) gae = torch.zeros(1, 1) for i in reversed(range(len(rewards))): R = params.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) TD = rewards[i] + params.gamma * values[i+1].data - values[i].data gae = gae * params.gamma * params.tau + TD policy_loss = policy_loss - log_probs[i] * Variable(gae) - 0.01 * entropies[i] optimizer.zero_grad() (policy_loss + 0.5 * value_loss).backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 40) ensure_shared_grads(model, shared_model) optimizer.step()
However variable is deprecated from pytorch 0.4, as I understand from the documentation-
Variable and Tensors are merged , tensors start to record gradients when requires_grad attribute is set True.
So, Can I just change the whole code from Variable(x) to torch.DoubleTensor(x,requires_grad = True), or do I need to change anything else as well?
From documentation it also says to use detach().Can I just replace Variable(x) by x.detach() when reusing x??
Thanks