Size mismatch with an RNN

Joshua_Bailey · July 8, 2019, 2:19pm

Hello,

I’m pretty new to building RNNs and I’m trying to build an LSTM for classification. It will take Tensors which are shaped (1,2,500) as far as i’m aware this is (batch size, number of seqs(I’m trying to do a multivariate sequence classification), seq length), correct?. Below is my class code, initialisation variables and the error I keep getting.

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_dim, output_size=1, n_layers=1):
        super(LSTM, self).__init__()
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_size,
                          output_size,
                          hidden_dim,
                          batch_first=True)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Sigmoid(output_size, hidden_dim)

    def forward(self, x, hidden_dim):
        r_out, hidden = self.lstm(x, hidden_dim)
        r_out = r_out.contiguous().view(-1, self.hidden_dim)
        output = self.dropout(r_out)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size, hidden_dim):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data

        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, hidden_dim).zero_())

        return hidden
input_size = 500  
output_size = 1  
hidden_dim = 64  # try 64, 128 and 256
n_layers = 1  # 1, 2 or 3
recnet = LSTM(input_size, output_size, hidden_dim, n_layers)
print(recnet)

And here is the error

Traceback (most recent call last):
  File "C:\Program Files\Python36\lib\site-packages\IPython\core\interactiveshell.py", line 2847, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-90-69dfc82af81b>", line 73, in <module>
    output, h = recnet(inputs.float(), h)
  File "C:\Program Files\Python36\lib\site-packages\torch\nn\modules\module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "<ipython-input-88-e7d41e594169>", line 18, in forward
    output = self.fc(output)
  File "C:\Program Files\Python36\lib\site-packages\torch\nn\modules\module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Program Files\Python36\lib\site-packages\torch\nn\modules\linear.py", line 92, in forward
    return F.linear(input, self.weight, self.bias)
  File "C:\Program Files\Python36\lib\site-packages\torch\nn\functional.py", line 1406, in linear
    ret = torch.addmm(bias, input, weight.t())
RuntimeError: size mismatch, m1: [128 x 1], m2: [64 x 1] at ..\aten\src\TH/generic/THTensorMath.cpp:961

Any help would be very much appreciated!

Thank you.