Pytorch multiple inputs in sequential

CesMak · March 22, 2020, 3:32pm

Hey there,

I would like to change my nn.module to have multiple inputs before the final softmax output layer:

I read that sequential is not working for for multiple inputs, that is why I used a seperate module and forward see picture:

Now what currently does not work is that in my sequential I can pass in state input vectors of torch.Size([180]) or torch.Size([1999, 180]) or any other 2d tensor torch.Size([n, 180])

However in my seperate module I get this error if I try to pass in a 2d tensor of torch.Size([n, 180])

Traceback (most recent call last):
  File "ppo_witches.py", line 289, in <module>
    main()
  File "ppo_witches.py", line 257, in main
    reward_mean, wrong_moves = ppo.update(memory)
  File "ppo_witches.py", line 168, in update
    logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)
  File "ppo_witches.py", line 120, in evaluate
    action_probs = self.action_layer(state)
  File "/home/mlamprecht/Documents/mcts_cardgame/my_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "ppo_witches.py", line 71, in forward
    output =torch.cat( (out1, out2), 0)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 64 and 180 in dimension 1 at /pytorch/aten/src/TH/generic/THTensor.cpp:612

Does anyone of you know a smart way of solving this issue?

ptrblck · March 23, 2020, 12:24am

The code is a bit hard to follow in screenshots and it’s a better idea to post the code directly by wrapping it in three backticks ```
However, based on the screenshots, I assume the error is raised in torch.cat((out1, out2), 0)?
Could you explain the indexing a bit?
It seems you are slicing input in the batch dimension to create out2 and I’m not sure why you would want to remove some samples.

Could you post an executable code snippet with some randomly initialized inputs, so that we could debug the code?

CesMak · March 23, 2020, 12:46pm

Sry I thought the image makes my point clearer.

yap this raises the error.

Yes this is exactly my problem I do not want to slice in the batch dimension.

Minimal Example:

import torch
import torch.nn as nn
class ActorMod(nn.Module):
    def __init__(self, state_dim, action_dim, n_latent_var):
        super(ActorMod, self).__init__()
        self.l1      = nn.Linear(state_dim, n_latent_var)
        self.l1_tanh = nn.Tanh()
        self.l2      = nn.Linear(n_latent_var, n_latent_var)
        self.l2_tanh = nn.Tanh()
        self.l3      = nn.Linear(n_latent_var+60, action_dim)

    def forward(self, input):
        x = self.l1(input)
        x = self.l1_tanh(x)
        x = self.l2(x)
        #return x.softmax(dim=-1)
        out1 = self.l2_tanh(x) # 64x1
        out2 = input[60:120]   # 60x1 this are the cards on the hand of the player!
        print(input.shape, out1.shape, out2.shape)
        output =torch.cat( [out1, out2], 0)
         #124x1 # do I need: x = torch.cat([layer_outputs[i] for i in layer_i], 1)
        x = self.l3(output)
        return x.softmax(dim=-1)

class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, n_latent_var):
        super(ActorCritic, self).__init__()

        self.action_layer = nn.Sequential(
                nn.Linear(state_dim, n_latent_var),
                nn.Tanh(),
                nn.Linear(n_latent_var, n_latent_var),
                nn.Tanh(),
                nn.Linear(n_latent_var, action_dim),
                nn.Softmax(dim=-1)
                )
        self.equivalent   = ActorMod(state_dim, action_dim, n_latent_var)

works = ActorCritic(180, 60, 64)
action_probs = works.equivalent(torch.rand(180)) # works
action_probs = works.action_layer(torch.rand(180)) # works

action_probs = works.action_layer(torch.rand(20,180)) # works
action_probs = works.equivalent(torch.rand(20, 180)) # fails

Error:

torch.Size([180]) torch.Size([64]) torch.Size([60])
torch.Size([20, 180]) torch.Size([20, 64]) torch.Size([0, 180])
Traceback (most recent call last):
  File "test.py", line 44, in <module>
    action_probs = works.equivalent(torch.rand(20, 180))
  File "/home/mlamprecht/Documents/mcts_cardgame/my_env/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "test.py", line 20, in forward
    output =torch.cat( [out1, out2], 0)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 64 and 180 in dimension 1 at /pytorch/aten/src/TH/generic/THTensor.cpp:612

CesMak · March 23, 2020, 4:43pm

I think I now found the solution:

Sequential is automatically doing the job for you if you have a 2d input matrix
whereas torch.cat has to be adopted!

minimal working code

import torch
import torch.nn as nn
class ActorMod(nn.Module):
    def __init__(self, state_dim, action_dim, n_latent_var):
        super(ActorMod, self).__init__()
        self.l1      = nn.Linear(state_dim, n_latent_var)
        self.l1_tanh = nn.Tanh()
        self.l2      = nn.Linear(n_latent_var, n_latent_var)
        self.l2_tanh = nn.Tanh()
        self.l3      = nn.Linear(n_latent_var+60, action_dim)

    def forward(self, input):
        x = self.l1(input)
        x = self.l1_tanh(x)
        x = self.l2(x)
        #return x.softmax(dim=-1)
        out1 = self.l2_tanh(x) # 64x1
        if len(input.shape)==1:
            out2 = input[60:120]   # 60x1 this are the cards on the hand of the player!
            output =torch.cat( [out1, out2], 0)
        else:
            out2 = input[:, 60:120]
            output =torch.cat( [out1, out2], 1) #how to do that?
        print("Input", input.shape, "Out1:", out1.shape, "Out2:", out2.shape)
        x = self.l3(output)
        return x.softmax(dim=-1)

class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, n_latent_var):
        super(ActorCritic, self).__init__()

        self.action_layer = nn.Sequential(
                nn.Linear(state_dim, n_latent_var),
                nn.Tanh(),
                nn.Linear(n_latent_var, n_latent_var),
                nn.Tanh(),
                nn.Linear(n_latent_var, action_dim),
                nn.Softmax(dim=-1)
                )
        self.equivalent   = ActorMod(state_dim, action_dim, n_latent_var)

works = ActorCritic(180, 60, 64)
action_probs = works.equivalent(torch.rand(180)) # works
#action_probs = works.action_layer(torch.rand(180)) # works
#
action_probs = works.action_layer(torch.rand(20,180)) # works
action_probs = works.equivalent(torch.rand(20, 180)) # works as well