I see a couple of such posts in forum but I have hardtime generalizing it to my own problem. Here’s the error:
Result:
File "Pmain.py", line 244, in Pmain
optimize_model()
File "Pmain.py", line 114, in optimize_model
state_action_values = Policy_Net(state_batch).gather(1, action_batch)
File "Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "OriNet.py", line 59, in forward
h1 = F.relu(self.fc1(x))
File "Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "Programs\Python\Python37\lib\site-packages\torch\nn\modules\linear.py", line 93, in forward
return F.linear(input, self.weight, self.bias)
File "Programs\Python\Python37\lib\site-packages\torch\nn\functional.py", line 1692, in linear
output = input.matmul(weight.t())
RuntimeError: mat1 dim 1 must match mat2 dim 0
Network
class DQN(nn.Module):
def __init__(self, num_states, num_actions):
super(DQN, self).__init__()
self.fc1 = nn.Linear(num_states, 32)
self.fc2 = nn.Linear(32, 32)
self.fc3 = nn.Linear(32,32)
self.fc4 = nn.Linear(32,12)
self.fc5 = nn.Linear(12, num_actions)
def forward(self, x):
h1 = F.relu(self.fc1(x))
h2 = F.relu(self.fc2(h1))
h3 = F.relu(self.fc3(h2))
h4 = F.relu(self.fc4(h3))
output = self.fc5(h4)
return output
optimize_model
def optimize_model():
if len(memory) < BATCH_SIZE:
return
transitions = memory.sample(BATCH_SIZE)
batch = Transition(*zip(*transitions))
state_batch = torch.cat(batch.state)
action_batch = torch.cat(batch.action)
reward_batch = torch.cat(batch.reward)
# state_batch = torch.unsqueeze(state_batch,1)
# state_batch = torch.unsqueeze(state_batch,0)
# state_batch = torch.unsqueeze(state_batch,0)
state_action_values = Policy_Net(state_batch).gather(1, action_batch)
next_state_values = torch.zeros(BATCH_SIZE, device=device)
non_final_mask = torch.ByteTensor(
tuple(map(lambda s: s is not None, batch.next_state)))
non_final_next_states = torch.cat([s for s in batch.next_state
if s is not None])
next_state_values[non_final_mask] = Target_Net(
non_final_next_states).max(1)[0].detach()
expected_state_action_values = (next_state_values * GAMMA) + reward_batch
loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))
optimizer.zero_grad()
loss.backward()
for param in Policy_Net.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
DQN input: [A,B,C,D,E,F,G,H] ( FloatTensor 8 variable,1dimension)
state_batch:
tensor([ 0., 181., 171., ..., 181., 196., 171.], device='cuda:0')
action_batch:
tensor([[0],
[0],
[0],
[0],
~
[0],
[0],
[0]], device='cuda:0')
I tryed “torch.unsqueeze”, but this program returned same error.
Can anyone help to solve this problem ?