I’m trying to use the BERT model “for multiple choice” and I constantly get this error:
here is the code snippet:
defined functions:
def pad_encode(text, tokenizer, max_length=50):
“”"
creates token ids of a uniform sequence length for a given sentence
“”"
tok_ids = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
tok_ids2 = tokenizer.add_special_tokens_single_sentence(tok_ids)
att_mask = [1 for _ in tok_ids2]
n_spectoks = len(tok_ids2) - len(tok_ids)
if len(tok_ids2) > max_length: # need to truncate
#print('Truncating from', len(tok_ids2)) n_to_trunc = len(tok_ids2) - max_length tok_ids2 = tokenizer.add_special_tokens_single_sentence(tok_ids[:-n_to_trunc]) att_mask = [1 for _ in tok_ids2]
elif len(tok_ids2) < max_length: # need to pad
padding = [] for i in range(len(tok_ids2), max_length): padding.append(tokenizer.pad_token_id) att_mask += [0 for _ in padding] tok_ids2 = tok_ids2 + padding
assert len(tok_ids2) == max_length
assert len(att_mask) == max_length
return tok_ids2, att_mask
def tokenize_batch(sentences, tok_model, max_len=50, debug=True):
“”"
tokenizes a batch of sentences and produces a tensor that can be fed to the model
“”"
assert type(sentences) == list
encoded = [pad_encode(s, tokenizer=tok_model[‘tokenizer’],
max_length=max_len)[0] for s in sentences]
att_masks = [pad_encode(s, tokenizer=tok_model[‘tokenizer’],
max_length=max_len)[1] for s in sentences]
input_ids = torch.tensor(encoded)
att_masks = torch.tensor(att_masks)
if debug: print(input_ids.shape)
if torch.cuda.is_available():
input_ids = input_ids.cuda() att_masks = att_masks.cuda()
return input_ids, att_masks
when I try to encode the batch data:
input_ids, att_masks = tokenize_batch(sentences, {“tokenizer”: self.tokenizer, “model”: self.bert_model}, max_len=self.seq_len, debug=True)
print(input_ids.shape, att_masks.shape)
model_out = bert_model(input_ids, attention_mask=att_masks)
I get the following:
torch.Size([64, 50]) torch.Size([64, 50])
…
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
→ 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)/usr/local/lib/python3.6/dist-packages/pytorch_transformers/modeling_bert.py in forward(self, input_ids, token_type_ids, attention_mask, labels, position_ids, head_mask)
1076 pooled_output = self.dropout(pooled_output)
1077 logits = self.classifier(pooled_output)
→ 1078 reshaped_logits = logits.view(-1, num_choices)
1079
1080 outputs = (reshaped_logits,) + outputs[2:] # add hidden states and attention if they are hereRuntimeError: shape ‘[-1, 50]’ is invalid for input of size 64