I’m doing reinforcement learning
Batch size cannot be greater than 1
I’m trying a batch size of 32
I can’t find anything strange when I look at Ross
I’m in trouble
tensor([[1.3092e-03, 2.1749e-03, 2.6177e-06, 2.2517e-03, 2.1730e-03, 4.7358e-05,
1.3836e-03, 8.5324e-04, 1.0955e-03, 1.9888e-03, 1.3575e-03, 1.6514e-05,
2.5816e-04, 1.8731e-04, 1.9047e-03, 5.3802e-04, 6.7010e-04, 8.9404e-04,
4.5198e-04, 1.1572e-04, 3.0101e-04, 1.6938e-03, 4.7421e-04, 6.1655e-04,
3.7556e-03, 1.9652e-04, 3.1410e-04, 6.3015e-05, 3.7714e-04, 2.9124e-08,
6.6303e-04, 2.7347e-05]], device=‘cuda:0’, grad_fn=)
RuntimeError Traceback (most recent call last)
in
427 trin.Double_R2D2_IQN_pioritized_Nstep_NAF_replay(batch_size, gamma,step=episode,
428 state_size=state_,action_size=acthon,
→ 429 multireward_steps=multireward_steps)
430
431 if done or t==max_number_of_steps + 1:
in Double_R2D2_IQN_pioritized_Nstep_NAF_replay(self, batch_size, gamma, step, state_size, action_size, multireward_steps)
263 loss=loss*weights
264 print(loss)
→ 265 loss.backward() # ここを変更
266 optimizer.step()
267 self.Rs=[0 for _ in range(multireward_steps)]
~\Anaconda3\envs\pyflan\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph)
196 products. Defaults to False
.
197 “”"
→ 198 torch.autograd.backward(self, gradient, retain_graph, create_graph)
199
200 def register_hook(self, hook):
~\Anaconda3\envs\pyflan\lib\site-packages\torch\autograd_init_.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
92 grad_tensors = list(grad_tensors)
93
—> 94 grad_tensors = _make_grads(tensors, grad_tensors)
95 if retain_graph is None:
96 retain_graph = create_graph
~\Anaconda3\envs\pyflan\lib\site-packages\torch\autograd_init_.py in _make_grads(outputs, grads)
33 if out.requires_grad:
34 if out.numel() != 1:
—> 35 raise RuntimeError(“grad can be implicitly created only for scalar outputs”)
36 new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
37 else:
RuntimeError: grad can be implicitly created only for scalar outputs
The code is
targets= torch.zeros(batch_size,device='cuda:0')
trin_x=list(range(batch_size))
weights = torch.ones(batch_size,device='cuda:0')
idx=np.random.choice(np.arange(len(memory_TDerror.buffer)), size=batch_size,
replace=False,p = memory_TDerror.buffer/np.sum(memory_TDerror.buffer))
for i in idx:
inputs=(torch.cat([memory.buffer[i-multireward_steps][0]]),
torch.cat([memory.buffer[i-multireward_steps][1]]))
for _,i in enumerate(idx):
with torch.no_grad():
targets[_]=memory.buffer[i][2].to("cuda:0")+(gamma ** multireward_steps)*targetQN.forward(memory.buffer[i][0],"net_v")
priority = rank_sum(memory_TDerror.buffer[i], self.alpha)
weights[_]=(len(memory.buffer) * priority) ** (-self.beta)
weights = weights / max(weights)
#リプレイとおなじ
optimizer.zero_grad()
output = mainQN.forward(inputs,"net_q")
if self.IQN==True:
self.loss_IQN(target,output,weights)
else:
loss = huber(output-targets)
loss=loss*weights
print(loss)
loss.backward() # ここを変更
optimizer.step()
self.Rs=[0 for _ in range(multireward_steps)]