Hi all, I am writing a simple neural network using LSTM to get some understanding of NER. I understand the whole idea but got into trouble with some dimension issues, here’s the problem:
class NERModel(nn.Module):
"""
Encoder for NER model.
Args:
- vocab_size: vocabulary size, integer.
- embedding_size: embedding size, integer.
- enc_units: hidden size of LSTM layer, integer.
- ffc_units: hidden units of feedforward layer, integer.
- num_labels: number of named entities. The value should be (actual_num_labels + 1),
because zero paddings are added to the sequences.
"""
def __init__(self, vocab_size, embedding_size, enc_units, ffc_units, num_labels):
super(NERModel, self).__init__()
# Word embedding layer.
self.embedding = nn.Embedding(vocab_size, embedding_size)
# LSTM layer with units of enc_units
self.LSTM = nn.LSTM(embedding_size, enc_units, batch_first=True)
self.dense1 = nn.Linear(enc_units, ffc_units)
self.dense2 = nn.Linear(ffc_units, num_labels)
def forward(self, x):
"""
Args:
- x: Input tensor of shape (batch_size, sequence_length)
Return:
Tensor of shape (batch_size, sequence_length, num_labels)
"""
x = self.embedding(x)
# after embedding: torch.Size([64, 124, 256])
x, _ = self.LSTM(x)
# after lstm: torch.Size([64, 124, 256])
x = self.dense1(x)
# after linear 1: torch.Size([64, 124, 256])
x = self.dense2(x)
# after linear 2: torch.Size([64, 124, 6])
output = F.log_softmax(x, dim=1)
# after softmax: torch.Size([64, 124, 6])
return output
# initialize model
model = NERModel(vocab_size, embedding_size, enc_units, ffc_units, num_labels)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.CrossEntropyLoss()
for i, (value, label) in enumerate(train_loader):
print(value.shape)
optimizer.zero_grad()
outputs = model(value)
# outputs shape: torch.Size([64, 124, 6])
# label shape: torch.Size([64, 124])
loss = criterion(outputs, label)
Things all looked good but I got the following error reported:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-9-92530e221aaf> in <module>()
---> 14 loss = criterion(outputs, label)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
940 def forward(self, input, target):
941 return F.cross_entropy(input, target, weight=self.weight,
--> 942 ignore_index=self.ignore_index, reduction=self.reduction)
943
944
~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2054 if size_average is not None or reduce is not None:
2055 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2056 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2057
2058
~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1879 if target.size()[1:] != input.size()[2:]:
1880 raise ValueError('Expected target size {}, got {}'.format(
-> 1881 out_size, target.size()))
1882 input = input.contiguous().view(n, c, 1, -1)
1883 target = target.contiguous().view(n, 1, -1)
ValueError: Expected target size (64, 6), got torch.Size([64, 124])
I have outputs shape torch.Size([64, 124, 6]) and label shape: torch.Size([64, 124]). It seems that loss function want me to have outputs shape torch.Size([64, 6, 124]). I don’t get the reason why it is like this, and can some one tell me how to modify it?