Target size (torch.Size([16])) must be the same as input size (torch.Size([16, 5]))

Hi, I am doing a text classification using pytorch and BERTSequenceClassification. The model will classify the text into 5 classes but I got this error while doing the training.

ValueError: Target size (torch.Size([16])) must be the same as input size (torch.Size([16, 5]))

Below is my code:

class BERT(nn.Module):

    def __init__(self):
        super(BERT, self).__init__()

        options_name = "bert-base-uncased"
        self.encoder = BertForSequenceClassification.from_pretrained(options_name, problem_type="multi_label_classification", num_labels=5)

    def forward(self, OriginalTweet, label):
        loss, text_fea = self.encoder(OriginalTweet, labels=label)[:2]

        return loss, text_fea
model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()
num_epochs = 5
eval_every = len(train_loader) // 2
file_path = destination_folder
best_valid_loss = float("Inf")

# initialize running values
running_loss = 0.0
valid_running_loss = 0.0
global_step = 0
train_loss_list = []
valid_loss_list = []
global_steps_list = []

# training loop
model.train()
for epoch in range(num_epochs):
    for (labels, OriginalTweet), _ in train_loader:
      
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)   
        OriginalTweet = OriginalTweet.type(torch.LongTensor)  
        OriginalTweet = OriginalTweet.to(device)
        output = model(OriginalTweet, labels)
        loss, _ = output

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # update running values
        running_loss += loss.item()
        global_step += 1

        # evaluation step
        if global_step % eval_every == 0:
            model.eval()
            with torch.no_grad():                    

                # validation loop
                for (labels,OriginalTweet), _ in valid_loader:
                    labels = labels.type(torch.LongTensor)           
                    labels = labels.to(device)
                    OriginalTweet = OriginalTweet.type(torch.LongTensor)  
                    OriginalTweet = OriginalTweet.to(device)
                    output = model(OriginalTweet, labels)
                    loss, _ = output
                    
                    valid_running_loss += loss.item()

            # evaluation
            average_train_loss = running_loss / eval_every
            average_valid_loss = valid_running_loss / len(valid_loader)
            train_loss_list.append(average_train_loss)
            valid_loss_list.append(average_valid_loss)
            global_steps_list.append(global_step)

            # resetting running values
            running_loss = 0.0                
            valid_running_loss = 0.0
            model.train()

            # print progress
            print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
                  .format(epoch+1, num_epochs, global_step, num_epochs*len(train_loader),
                          average_train_loss, average_valid_loss))
            
            # checkpoint
            if best_valid_loss > average_valid_loss:
                best_valid_loss = average_valid_loss
                save_checkpoint(file_path + '/' + 'model.pt', model, best_valid_loss)
                save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)

save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)

I am very new in this field and I have been stuck in this error for a while. So any help is appreciated! Thank you

It would be more helpful if you post the full error message (e.g. which line causes this error). I feel like the cause is OriginalTweet, but I’m not sure because I cannot see how your dataloader work. According to the document it should be the tokenizer output of the same BERT model. Do you apply this tokenization in your dataloader?

Additionally to what @Dazitu616 said:
Assuming multi_label_classification means a sample can belong to zero, one, or multiple classes, the targets are expected to have the same shape as the model output as usually nn.BCEWithLogitsLoss would be used. I couldn’t find the definition of this argument quickly so don’t know how the model is calculating the loss.
Maybe multi_class_classification is a thing and would fit your use case better?

→ output = model(OriginalTweet, labels)
above is the line that causes this error

Below is the stack trace

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

in forward(self, OriginalTweet, label)
8
9 def forward(self, OriginalTweet, label):
—> 10 loss, text_fea = self.encoder(OriginalTweet, labels=label)[:2]
11
12 return loss, text_fea

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1581 elif self.config.problem_type == “multi_label_classification”:
1582 loss_fct = BCEWithLogitsLoss()
→ 1583 loss = loss_fct(logits, labels)
1584 if not return_dict:
1585 output = (logits,) + outputs[2:]

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
705 self.weight,
706 pos_weight=self.pos_weight,
→ 707 reduction=self.reduction)
708
709

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
2978
2979 if not (target.size() == input.size()):
→ 2980 raise ValueError(“Target size ({}) must be the same as input size ({})”.format(target.size(), input.size()))
2981
2982 return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)

ValueError: Target size (torch.Size([16])) must be the same as input size (torch.Size([16, 5]))

→ output = model(OriginalTweet, labels)
above is the line that causes this error