Constantly getting a runtime error: invalid shape

thiziri · April 20, 2020, 3:18pm

I’m trying to use the BERT model “for multiple choice” and I constantly get this error:
here is the code snippet:
defined functions:

def pad_encode(text, tokenizer, max_length=50):

“”"

creates token ids of a uniform sequence length for a given sentence

“”"

tok_ids = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))

tok_ids2 = tokenizer.add_special_tokens_single_sentence(tok_ids)

att_mask = [1 for _ in tok_ids2]

n_spectoks = len(tok_ids2) - len(tok_ids)

if len(tok_ids2) > max_length: # need to truncate
#print('Truncating from', len(tok_ids2))

n_to_trunc = len(tok_ids2) - max_length

tok_ids2 = tokenizer.add_special_tokens_single_sentence(tok_ids[:-n_to_trunc])

att_mask = [1 for _ in tok_ids2]
elif len(tok_ids2) < max_length: # need to pad
padding = []

for i in range(len(tok_ids2), max_length):

  padding.append(tokenizer.pad_token_id)

att_mask += [0 for _ in padding]

tok_ids2 = tok_ids2 + padding
assert len(tok_ids2) == max_length

assert len(att_mask) == max_length

return tok_ids2, att_mask

def tokenize_batch(sentences, tok_model, max_len=50, debug=True):

“”"

tokenizes a batch of sentences and produces a tensor that can be fed to the model

“”"

assert type(sentences) == list

encoded = [pad_encode(s, tokenizer=tok_model[‘tokenizer’],
                    max_length=max_len)[0] for s in sentences]
att_masks = [pad_encode(s, tokenizer=tok_model[‘tokenizer’],
                    max_length=max_len)[1] for s in sentences]
input_ids = torch.tensor(encoded)

att_masks = torch.tensor(att_masks)

if debug: print(input_ids.shape)

if torch.cuda.is_available():
input_ids = input_ids.cuda()

att_masks = att_masks.cuda()
return input_ids, att_masks

when I try to encode the batch data:

input_ids, att_masks = tokenize_batch(sentences, {“tokenizer”: self.tokenizer, “model”: self.bert_model}, max_len=self.seq_len, debug=True)
print(input_ids.shape, att_masks.shape)
model_out = bert_model(input_ids, attention_mask=att_masks)

I get the following:

torch.Size([64, 50]) torch.Size([64, 50])
…
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
→ 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/pytorch_transformers/modeling_bert.py in forward(self, input_ids, token_type_ids, attention_mask, labels, position_ids, head_mask)
1076 pooled_output = self.dropout(pooled_output)
1077 logits = self.classifier(pooled_output)
→ 1078 reshaped_logits = logits.view(-1, num_choices)
1079
1080 outputs = (reshaped_logits,) + outputs[2:] # add hidden states and attention if they are here

RuntimeError: shape ‘[-1, 50]’ is invalid for input of size 64