Hi ,
i am trying to apply backward view algorithm but i can’t able to update my weight manually.
here is the formula for updating weight:
I try to update weight like this:
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
self.optimizer.zero_grad()
outputs = self.dqn(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
next_outputs = self.dqn(batch_next_state).detach().max(1)[0]
target = self.gamma*next_outputs + batch_reward
loss = F.mse_loss(outputs, target)
loss.backward()
#self.optimizer.step()
Weight =[]
with torch.no_grad():
for p in self.dqn.parameters():
Weight.append(p)
Weight = self.backward_view(outputs, target, batch_reward, p, loss, p.grad)
p.copy_(Weight)
def backward_view(self, outputs, target, batch_reward, p, loss, grad):
Weight = []
alpha, lambd = 0.0001, 0.5
for bs in range(32):
i = 0
for parameter in p:
self.E[i] = torch.Tensor(lambd*self.gamma*self.E[i]) + grad[bs]
parameter -= alpha * self.E[i]*(target[bs] - outputs[bs])
Weight.append(parameter)
i += 1
return Weight
but got following error:
TypeError: new(): data must be a sequence (got float)
can anyone tell me how to update weight according to this formula using pytorch, please?