Gradient values are None

circa · May 2, 2020, 11:36am

lenobs = 100800
class ActorCritic(nn.Module):
    def __init__(self, ran):
        super(ActorCritic, self).__init__()
        torch.random.manual_seed(ran)
        self.l1 = nn.Linear(lenobs,25)
        self.l2 = nn.Linear(25,50)
        self.actor_lin1 = nn.Linear(50,6)
        self.l3 = nn.Linear(50,25)
        self.critic_lin1 = nn.Linear(25,1)

    def forward(self,x):
        x = F.normalize(x,dim=0)
        y = F.relu(self.l1(x))
        y = F.normalize(y,dim=0)
        y = F.relu(self.l2(y))
        y = F.normalize(y,dim=0)
        actor = F.log_softmax(self.actor_lin1(y),dim=0)
        c = F.relu(self.l3(y.detach()))
        critic = F.hardtanh(self.critic_lin1(c))
        return actor, critic


def doTrain(model, ran):  
    
    env = gym.make('Pong-v0')
    mi = model(ran)
    optimizer = optim.Adam(lr=1e-4,params=mi.parameters())
    
    values, rewards, logprobs = [],[],[]
    observation = env.reset()
    done = False
    N = 0
    while done == False and N<10:
        N+=1
        pobservation = torch.from_numpy(observation)
        flattened_pobservation = pobservation.view(-1).float()
        policy, value = mi(flattened_pobservation)
        values.append(value.item())
        sampler = Categorical(policy)
        action = sampler.sample()
        logprobs.append(policy[action.item()].item())
        observation, reward, done, log = env.step(action.item())
        if done:
            rewards.append(1.0)

        else:
            rewards.append(reward)

    torch_values = torch.Tensor(values).view(-1)
    torch_rewards = torch.Tensor(rewards)
    torch_logprobs = torch.Tensor(logprobs)#.flip(0)
    
    returns = []
    gamma = 0.90
    clc = 0.1
    ret = torch.Tensor([0])
    for r in torch_rewards:
        ret = r + gamma*ret
        returns.append(ret)
    returns = torch.tensor(returns, requires_grad = True).view(-1)
    returns = F.normalize(returns,dim=0)
    actor_loss = -1*torch_logprobs * (returns - torch_values.detach())
    critic_loss = torch.pow(torch_values - returns,2)
    loss = actor_loss.sum() + clc*critic_loss.sum()
    optimizer.zero_grad()
    loss.backward()
    gradients = []
    
    
    for i in mi.parameters():
        try:
            gradients.append(i.grad)
        except:
            gradients.append('No Grad')
    

    
    optimizer.step()
    return gradients



updatedParams = []
results = []


with concurrent.futures.ProcessPoolExecutor() as executor:
    for i in range(5):
        results.append(executor.submit(doTrain, ActorCritic, int((torch.randn(1)**2)*200)))
    

    for f in concurrent.futures.as_completed(results):
        updatedParams.append(f.result())

updatedParams


[[None, None, None, None, None, None, None, None, None, None],
 [None, None, None, None, None, None, None, None, None, None],
 [None, None, None, None, None, None, None, None, None, None],
 [None, None, None, None, None, None, None, None, None, None],
 [None, None, None, None, None, None, None, None, None, None]]

I am trying to implement A2C for Pong.
I can’t figure why I am getting None for gradient values. Have I broken the computation graph
somewhere ?

ptrblck · May 3, 2020, 3:10am

Yes, it seems this line of code detaches y from the computation graph:

c = F.relu(self.l3(y.detach()))

circa · May 3, 2020, 11:09am

I removed .detach()
But the output remains same.

ptrblck · May 4, 2020, 12:12am

Looking further into the code, it seems you are calling detach() multiple times, e.g. also in torch_values.detach(), are calling item() in value.item(), and are recreating tensors, which will also break the computation graph in:

torch_values = torch.Tensor(values).view(-1)
torch_rewards = torch.Tensor(rewards)
torch_logprobs = torch.Tensor(logprobs)
returns = torch.tensor(returns, requires_grad = True).view(-1)

All these operations will create new tensors without a history and Autograd won’t be able to calculate the gradients for all preceding operations.
Instead of recreating the tensors you should just use them in all further operations.