I am a newbie with lstm and confused with dimensions when going from lstm to the linear layer. I get the following error: # size mismatch, m1: [4096 x 128], m2: [64 x 3]. I know I have to flatten in some way but do not know how to.
For reference batch_size=64, hidden_dim=64, tagset_size=3, embedding_dim=64. Below is my class LSTM (bidirectional lstm). The error is in the forward() when it runs tag_space = self.fc(lstm_out). I think its because dimensions going from lstm to linear are not correct, but I cannot figure out why (I added print statements so you can see the dimensions).
Here is my class:
class LSTM(nn.Module):
def __init__(self, embedding_dim, hidden_dim, batch_size, vocab_size, tagset_size, layer, direct, dropout):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=layer, bidirectional=True, dropout=0.5, batch_first=True)
self.direct = 2 if direct else 1
self.layer = layer
self.fc = nn.Linear(2*hidden_dim, tagset_size)
self.hidden = self.init_hidden()
def init_hidden(self):
hid = torch.autograd.Variable(torch.zeros(self.layer * self.direct, batch_size, self.hidden_dim))
cel = torch.autograd.Variable(torch.zeros(self.layer * self.direct, batch_size, self.hidden_dim))
hid = hid.to(device)
cel = cel.to(device)
return hid, cel
def forward(self, sentence):
print(sentence.size()) -> torch.Size([64, 64])
embeds = self.word_embeddings(sentence)
print(embeds.size()) -> torch.Size([64, 64, 64])
lstm_out, self.hidden = self.lstm(
embeds, self.hidden)
print(lstm_out.size()) -> torch.Size([64, 32, 128])
###### Below line is the error
tag_space = self.fc(lstm_out)
tag_scores = F.log_softmax(tag_space, dim=2)
return tag_scores