I am trying to build a next word prediction model with pytorch in google colab. As my vocabulary size is over 1.5 million, I am using AdaptiveLogSoftmaxWithLoss module of pytorch to reduce RAM consumption.
The simple BiLSTM model definition is as follows:
class BLSTM(nn.Module):
def __init__(self, emb_size, hidden_size, num_layers, vocab_size, cutoffs):
super(BLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.blstm = nn.LSTM(emb_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
self.fc = nn.AdaptiveLogSoftmaxWithLoss(hidden_size*2, vocab_size, cutoffs)
# self.fc = nn.Linear(hidden_size*2, vocab_size)
def forward(self, x):
# Set initial states
h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)
c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)
# Forward propagate LSTM
embed = nn.Embedding(vocab_size, emb_size)
out, _ = self.blstm(embed(x), (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size*2)
# Decode the hidden state of the last time step
out = self.fc(out[:, -1, :])
return out
The model and loss function are called as follows:
model = BLSTM(emb_size, hidden_size, num_layers, vocab_size, cutoffs)
# Loss and optimizer
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Inside the epoch the loss is calculated as follows:
inputs = x[0].to(device)
targets = x[1].to(device)
# Forward pass
outputs = model(inputs)
outputs = outputs.to(device)
loss = criterion(outputs, targets)
print(loss.item())
#Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
This is the complete error trace I am getting after running the epoch loop:
TypeError Traceback (most recent call last)
<ipython-input-33-51e15380f8c7> in <module>()
8
9 # Forward pass
---> 10 outputs = model(inputs)
11 outputs = outputs.to(device)
12 loss = criterion(outputs, targets)
2 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
TypeError: forward() missing 1 required positional argument: 'target'
I tried the same code with a simple nn.Linear() layer and the code runs fine. But when I replace the Linear layer with AdaptiveLogSoftmaxWithLoss, I get the above mentioned error.