TD(lambda) backward view

Swakshar_Deb · August 18, 2019, 12:46pm

Hi ,
i am trying to apply backward view algorithm but i can’t able to update my weight manually.
here is the formula for updating weight:

I try to update weight like this:

def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
        self.optimizer.zero_grad()
        outputs = self.dqn(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
        next_outputs = self.dqn(batch_next_state).detach().max(1)[0]
        target = self.gamma*next_outputs + batch_reward
        loss = F.mse_loss(outputs, target)
        loss.backward()
        #self.optimizer.step()
        Weight =[]
        with torch.no_grad():
            for p in self.dqn.parameters():
                Weight.append(p)
                Weight = self.backward_view(outputs, target, batch_reward, p, loss, p.grad)
                p.copy_(Weight)
                
    def backward_view(self, outputs, target, batch_reward, p, loss, grad):
        Weight = []
        alpha, lambd = 0.0001, 0.5
        for bs in range(32):
            i = 0
            for parameter in p:
                self.E[i] = torch.Tensor(lambd*self.gamma*self.E[i]) + grad[bs]
                parameter -= alpha * self.E[i]*(target[bs] - outputs[bs]) 
                Weight.append(parameter)
                i += 1
        return Weight

but got following error:
TypeError: new(): data must be a sequence (got float)

can anyone tell me how to update weight according to this formula using pytorch, please?