I am doing Semantic Role Labeling, trying to implement Attention BiLSTM
Batch Size = 128
class BaselineModel(nn.Module):
def __init__(self, hparams):
super(BaselineModel, self).__init__()
self.name = hparams.model_name
self.hidden_dim = hparams.hidden_dim
# vocab_size = 27308, embeddings_dim = 300
self.word_embedding = nn.Embedding(hparams.vocab_size, hparams.embedding_dim)
# hidden_dim = 256, bidirectional = True, num_layers = 2, dropout = 0.4
self.lstm = nn.LSTM(hparams.embedding_dim, hparams.hidden_dim,
bidirectional=hparams.bidirectional,
num_layers=hparams.num_layers,
dropout=hparams.dropout)
self.dropout = nn.Dropout(hparams.dropout)
# hidden_dim = 256, num_classes = 35
self.classifier = nn.Linear(hparams.hidden_dim, hparams.num_classes)
def attnetwork(self, encoder_out, final_hidden):
hidden = final_hidden.squeeze(0)
attn_weights = F.softmax(torch.bmm(encoder_out, hidden.unsqueeze(2)).squeeze(2), 1)
return torch.bmm(encoder_out.transpose(1, 2), attn_weights.unsqueeze(2)).squeeze(2)
def forward(self, x):
embeddings = self.word_embedding(x)
embeddings = self.dropout(embeddings)
output, (hidden_state, _) = self.lstm(embeddings)
encoder_out = output[:, :, :self.hidden_dim] + output[:, :, self.hidden_dim:]
encoder_out = encoder_out.permute(1, 0, 2)
encoder_hidden = (hidden_state[-2, :, :] + hidden_state[-1, :, :]).unsqueeze(0)
attention_output = self.attnetwork(encoder_out, encoder_hidden)
logits = self.classifier(attention_output)
return logits
My training procedure produces the following error expected input batch size 128 to match target batch size 50
def train(self, train_dataset, valid_dataset, epochs):
train_loss = 0.0
for epoch in range(1, epochs + 1):
epoch_loss = 0.0
self.model.train()
for step, sample in enumerate(train_dataset): # using dataloader
inputs, labels = sample[0], sample[1]
# print(inputs.shape) # [50, 128]
# print(labels.shape) # [50, 128]
self.optimizer.zero_grad()
predictions_ = self.model(inputs)
# predictions_.shape = [128, 35], labels.shape = [50, 128]
sample_loss = self.loss_function(predictions_, labels) # ERROR OCCURS HERE
sample_loss.backward()
clip_grad_norm_(self.model.parameters(), 5.) # Gradient Clipping
self.optimizer.step()
epoch_loss += sample_loss.tolist()
avg_epoch_loss = epoch_loss / len(train_dataset)
train_loss += avg_epoch_loss
valid_loss = self.evaluate(valid_dataset)
if self._verbose > 0:
print(f'Epoch {epoch}: [loss = {avg_epoch_loss:0.4f}, val_loss = {valid_loss:0.4f}]')
Working on Colab with torch 1.5.0