Sequence2Sequence with LSTM

I am very new to pytorch and was following the sequence2seqeuce modelling with Attention tutorial. The tutorial link is :

I am trying to implement the same model using LSTM layers in the encoder and decoder. The following code is the encoder class:

 class Encode(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encode, self).__init__()
        self.hidden_size = config.hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.LSTM = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.LSTM(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=config.device)

The code shows following error:

RuntimeError: Expected hidden[0] size (1, 1, 256), got (1, 256)

I guess the LSTM layer has 3d tensor whereas GRU layer works with 2D tensors. Now how can i convert the 2d tensors into 3d?

It looks like your tensor should be the right size from your initHidden method. Are you generating it from some other way?

You might find this discussion valuable - How to properly use hidden states for RNN

Are you actually executing that InitHidden anywhere?

Yes, while starting the training i am assigning the hidden tensor as (1,1, hidden layer size =256)

Thank you. I have corrected the code according to the discussion you provided but i got another error. Its because of the attention decoder layer. The code fr the decoder is:

class AttnDecode(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p= config.dropout_p, max_length=config.MAX_LENGTH):
        super(AttnDecode, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        self.embedding_width = config.embedding_width
        self.batch_size = config.batch_size
        self.n_layers = config.n_layers

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.LSTM = nn.LSTM(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(self.attn([0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),

        output =[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.LSTM(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        lstm_init_h = nn.Parameter(nn.init.xavier_uniform(
            torch.Tensor(self.n_layers, self.batch_size, self.embedding_width).type(torch.FloatTensor)),
        lstm_init_c = nn.Parameter(nn.init.xavier_uniform(
            torch.Tensor(self.n_layers, self.batch_size, self.embedding_width).type(torch.FloatTensor)),
        return (lstm_init_h, lstm_init_c)

And the error is :

 Traceback (most recent call last):
  File "/home/khaledkucse/Project/python/[PyTorch]Sequence2Sequence/", line 39, in <module>
    Train.trainIters(encoder, attn_decoder, train_pairs, input_lang, output_lang, n_iters=n_iters, print_every=config.print_every)
  File "/home/khaledkucse/Project/python/[PyTorch]Sequence2Sequence/", line 162, in trainIters
    loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
  File "/home/khaledkucse/Project/python/[PyTorch]Sequence2Sequence/", line 104, in train
    decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
  File "/home/khaledkucse/.local/lib/python3.6/site-packages/torch/nn/modules/", line 491, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/khaledkucse/Project/python/[PyTorch]Sequence2Sequence/", line 60, in forward
    self.attn([0], hidden[0]), 1)), dim=1)
RuntimeError: invalid argument 0: Tensors must have same number of dimensions: got 2 and 3 at /pytorch/aten/src/TH/generic/THTensorMath.c:3577

I guess the line self.attn([0], hidden[0]),1), dim=1) has the problem. can you help me out?

Can you print embedded[0].size() and hidden[0].size()?

embedded[0] size: torch.Size([1, 512])
hidden[0] size: torch.Size([1, 1, 512])

Right so either embedded needs to be the same size as hidden or vice versa.