Am extending a personal/hobby project – to support MADDPG. Full disclosure – not the most elegant code here…
Getting size mismatch error – though dimensions should work as they are identical per error.
Model is defined under model.py. Also summarized here:
CriticNetwork(
(fc1): Linear(in_features=471, out_features=1024, bias=True)
(fc2): Linear(in_features=1024, out_features=10240, bias=True)
(fc3): Linear(in_features=10240, out_features=1024, bias=True)
(fc4): Linear(in_features=1024, out_features=1, bias=True)
)
ActorNetwork(
(fc1): Linear(in_features=384, out_features=1024, bias=True)
(fc2): Linear(in_features=1024, out_features=10240, bias=True)
(fc3): Linear(in_features=10240, out_features=1024, bias=True)
(fc4): Linear(in_features=1024, out_features=87, bias=True)
)
Whereas the ARGS are defined in args.py
===========================================================================
select a game: [‘chessy’ or ‘checkers’]: chessy
select number of teams: [0, 1 or 2] 2
Choose a name for this team [e.g. ‘blue_team’ or ‘green_team’]: Agent
Choose a color for team_2 [e.g. ‘blue’: green
Please enter team’s skill_level [1 = Novice, 10 = expert]: 10
Please enter team’s strategy [0 = ‘cooperative’, 1 = ‘competitive’]: 1
Choose a name for this team [e.g. ‘blue_team’ or ‘green_team’]: Environment
Choose a color for team_2 [e.g. ‘blue’: red
Please enter team’s skill_level [1 = Novice, 10 = expert]: 1
Please enter team’s strategy [0 = ‘cooperative’, 1 = ‘competitive’]: 1
How many trials would you like to run? [1 - 1,000,000] 10000
Do you want to see the board positions in realtime? [ ‘Yes’ or ‘No’ ]no
==============================RUN_TRIALS==================================
/home/ubuntu/chessy/model.py:46: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
return F.softmax(self.fc4(x))
RuntimeError Traceback (most recent call last)
in
7 global HISTORY_FILE
8 user_input = userInput()
----> 9 run_trials(user_input)
10
11 #main()
in run_trials(user_input)
29 env = Game(user_input.game, args, 8, sides, user_input.display_board_positions)
30
—> 31 run_trial(user_input,env, mCritic)
32
33 cycle += 1
in run_trial(user_input, env, mCritic)
26 next_state = np.array(list(map(int, env.state))).astype(np.float32)
27
—> 28 agent_p0.step(state, action, int(reward), next_state, done, mCritic)
29
30 time_step += 1
~/chessy/agent.py in step(self, state, action, reward, next_state, done, mCritic)
59 if len(self.memory) > self.args[‘BATCH_SIZE’]:
60 experiences = self.memory.sample()
—> 61 self.train(experiences, mCritic)
62
63
~/chessy/agent.py in train(self, experiences, mCritic)
99 # Get predicted next-state actions and Q values from target models
100 actions_next = self.actor_target(next_states)
–> 101 Q_targets_next = mCritic.target(next_states, actions_next)
102
103 # Compute Q targets for current states (y_i)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
–> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~/chessy/model.py in forward(self, state, action)
70 state = torch.transpose(state, 0,1)
71 action = torch.transpose(action,0,1)
—> 72 x = F.relu(self.fc1(torch.cat((state, action))))
73 x = F.relu(self.fc2(x))
74 x = F.relu(self.fc3(x))
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
–> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input)
85
86 def forward(self, input):
—> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1368 if input.dim() == 2 and bias is not None:
1369 # fused op is marginally faster
-> 1370 ret = torch.addmm(bias, input, weight.t())
1371 else:
1372 #print(“INPUT:\n{}\n”.format(input))
RuntimeError: size mismatch, m1: [471 x 1024], m2: [471 x 1024] at /opt/conda/conda-bld/pytorch_1579040055865/work/aten/src/THC/generic/THCTensorMathBlas.cu:290
action = “2,2,-1”
action.split(",")
x = torch.Tensor(np.random.choice((0,1),87)).to(device)
x.shape
y = torch.Tenso