The scenario: I don’t want to use embedding layer in decoder part so that I did step like this
- In
def forward
, I change fromembedded = self.dropout(self.embedding(input))
toembedded = self.dropout(input)
but I got an errorFor unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors
- Following that, I ask clude3 that say I should check if hidden and cell are 3D, if so, squeeze the first dimension with code
if hidden.ndim == 3:
hidden = hidden.squeeze(0) # hidden = [batch size, hidden dim]
if cell.ndim == 3:
cell = cell.squeeze(0) # cell = [batch size, hidden dim]
Until now, it is still error that is the same as I got in step 1)
Hence, Could you please help me to solve it and I’m not sure that it have other impacts on another part?
This is my last code:
class Decoder(nn.Module):
def __init__(self, output_dim, embedding_dim, hidden_dim, n_layers, dropout):
super().__init__()
self.output_dim = output_dim
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.rnn = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=dropout)
self.fc_out = nn.Linear(hidden_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, input, hidden, cell):
# input = [batch size]
# hidden = [n layers * n directions, batch size, hidden dim]
# cell = [n layers * n directions, batch size, hidden dim]
# n directions in the decoder will both always be 1, therefore:
# hidden = [n layers, batch size, hidden dim]
# context = [n layers, batch size, hidden dim]
input = input.unsqueeze(0)
# input = [1, batch size]
embedded = self.dropout(input)
# embedded = [1, batch size, embedding dim]
output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
# output = [seq length, batch size, hidden dim * n directions]
# hidden = [n layers * n directions, batch size, hidden dim]
# cell = [n layers * n directions, batch size, hidden dim]
# seq length and n directions will always be 1 in this decoder, therefore:
# output = [1, batch size, hidden dim]
# hidden = [n layers, batch size, hidden dim]
# cell = [n layers, batch size, hidden dim]
prediction = self.fc_out(output.squeeze(0))
# prediction = [batch size, output dim]
return prediction, hidden, cell