I am trying to implement a simple character level names generator, the input word is provided to the model as one hot encoded tensor of shape (word_characters, 24), where the 24 is the size of the vocabulary characters.
class LSTM(nn.Module):
def __init__(self):
super(LSTM, self).__init__()
self.n_hidden = 32
self.n_layers = 3
self.lstm = nn.LSTM(
input_size=chars_count,
hidden_size=self.n_hidden,
num_layers=self.n_layers,
batch_first=True
)
self.linear = nn.Linear(in_features=self.n_hidden, out_features=chars_count)
def forward(self, x):
h0 = torch.zeros((self.n_layers, x.size(0), self.n_hidden)) # x.size(0) is batch size, i.e. 1
c0 = torch.zeros((self.n_layers, x.size(0), self.n_hidden))
output, state = self.lstm(x, (h0, c0))
output = self.linear(output)
return output, state
model = LSTM()
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for i in range(1000):
for xi, yi in dataset:
# add batch size of 1
xi = xi[None, :, :] # shape = [1, 7, 24]
yi = yi[None, :, :] # shape = [1, 7, 24]
optimizer.zero_grad()
l = loss(model(xi)[0], yi)
l.backward()
optimizer.step()
Whenever I execute the training loop, I get the following error raised from this line
l = loss(model(xi)[0], yi)
**ValueError** : Expected target size (1, 24), got torch.Size([1, 7, 24])
Can anyone point me what am I doing wrong? the shape of the output of my model is the same as the training label shape.