Hello Everyone,
I am implementing Stack BiLSTM upon the bert sequential output for text classification of dialogue state tracking dataset in encoder. Afterwards the output from Stacked BiLSTM and Bert overall features are concatenating to pass the decoder for mask attention. I am getting the error RuntimeError: shape ‘[-1, 1280, 1]’ is invalid for input of size 2776. The part of code is following where the error is occurring:
class UtteranceAttention(nn.Module):
def init(self, attn_head, model_output_dim, dropout=0.):
super(UtteranceAttention, self).init()
self.attn_head = attn_head
self.model_output_dim = model_output_dim
self.dropout = dropout
self.attn_fun = MultiHeadAttention(attn_head, model_output_dim, dropout=0.)
def forward(self, query, value, attention_mask=None):
num_query = query.size(0)
batch_size = value.size(0)
seq_length = value.size(1)
expanded_query = query.unsqueeze(0).expand(batch_size, *query.shape)
if attention_mask is not None:
expanded_attention_mask = attention_mask.view(-1, seq_length, 1).expand(value.size()).float()
new_value = torch.mul(value, expanded_attention_mask)
attn_mask = attention_mask.unsqueeze(1).expand(batch_size, num_query, seq_length)
else:
new_value = value
attn_mask = None
attended_embedding = self.attn_fun(expanded_query, new_value, new_value, mask=attn_mask)
return attended_embedding
class Decoder(nn.Module):
def init(self, attn_head, bert_output_dim, dropout_prob):
super(Decoder, self).init()
# slot utterance attention
self.slot_utter_attn = UtteranceAttention(attn_head, bert_output_dim, dropout=0.)
# prediction
self.pred = nn.Sequential(nn.Dropout(p=dropout_prob),
nn.Linear(bert_output_dim, bert_output_dim),
nn.LayerNorm(bert_output_dim))
def forward(self, sequence_output, attention_mask, slot_embedding):
# slot utterance attention
slot_utter_emb = self.slot_utter_attn(slot_embedding, sequence_output, attention_mask)
# prediction
hidden = self.pred(slot_utter_emb)
return hidden # [batch_size, num_slots, dim]
class BeliefTracker(nn.Module):
def init(self, pretrained_model_type, lstm_hidden_dim, n_layers, lstm_drop, attn_head, dropout = 0.25, num_labels = None, dropout_prob=0.):
super(BeliefTracker, self).init()
self.encoder = UtteranceEncoding.from_pretrained(pretrained_model_type)
self.hidden_size = self.encoder.config.hidden_size
self.rnn = nn.LSTM(self.hidden_size, lstm_hidden_dim, num_layers=n_layers, bidirectional=True,
batch_first=True, dropout=lstm_drop)
#self.pool_dropout = nn.Dropout(self.encoder.config.hidden_dropout_prob)
#self.classifier = nn.Linear(self.encoder.config.hidden_size + 2 * lstm_hidden_dim, num_labels)
#self.dropout = nn.Dropout(dropout)
self.decoder = Decoder(attn_head, self.hidden_size, dropout_prob)
def forward(self, input_ids, attention_mask, token_type_ids, slot_emb):
# encoder, a pretrained model, output is a tuple
sequence_output, pooled_output = self.encoder(input_ids, attention_mask, token_type_ids)[0: 2] # [batch_size, seq_length, dim]
print(attention_mask.size())
self.rnn.flatten_parameters()
_, (hn, cn) = self.rnn(sequence_output)
hidden = torch.cat((hn[-2, :, :], hn[-1, :, :]), dim=1)
print(hidden.size())
#pooled_output = self.pool_dropout(pooled_output)
pooled_hidden = torch.cat((pooled_output, hidden), dim=1)
print(pooled_hidden.size())
attention_mask = None
#logits = self.classifier(pooled_hidden)
# decoder, slot utterance attention, followed by a linear layer
slot_output = self.decoder(pooled_hidden, attention_mask, slot_emb)
return slot_output
Here is the error
Epoch: 0% 0/1 [00:00<?, ?it/s]
0% 0/922 [00:00<?, ?it/s]torch.Size([8, 347])
torch.Size([8, 512])
torch.Size([8, 1280])
0% 0/922 [00:12<?, ?it/s]
Epoch: 0% 0/1 [00:12<?, ?it/s]
Traceback (most recent call last):
File “/content/drive/My Drive/AUX-DST/train-aux.py”, line 264, in
main(args)
File “/content/drive/My Drive/AUX-DST/train-aux.py”, line 150, in main
slot_emb=slot_lookup) # [batch_size, num_slots, dim]
File “/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py”, line 1130, in _call_impl
return forward_call(*input, **kwargs)
File “/content/drive/My Drive/AUX-DST/models/DST.py”, line 155, in forward
slot_output = self.decoder(pooled_hidden, attention_mask, slot_emb)
File “/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py”, line 1130, in _call_impl
return forward_call(*input, **kwargs)
File “/content/drive/My Drive/AUX-DST/models/DST.py”, line 120, in forward
slot_utter_emb = self.slot_utter_attn(slot_embedding, sequence_output, attention_mask)
File “/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py”, line 1130, in _call_impl
return forward_call(*input, **kwargs)
File “/content/drive/My Drive/AUX-DST/models/DST.py”, line 95, in forward
expanded_attention_mask = attention_mask.view(-1, 1280, 1).expand(value.size()).float()
RuntimeError: shape ‘[-1, 1280, 1]’ is invalid for input of size 2776