ValueError: Expected input batch_size (1) to match target batch_size (26)

CuongNguyen · June 6, 2020, 6:52am

I’m getting the following error when using BERT with BiLSTM (my batch_size on BERT is 26). I want to concat last 4 hidden layer of BERT then feed it to BiLSTM. Here is my model:

from transformers import BertPreTrainedModel, BertModel
import torch.nn as nn
import torch
import torch.nn.functional as F

class BERT(BertPreTrainedModel):
    def __init__(self, config):
        super(BERT, self).__init__(config)
        self.device = config.device
        self.num_labels = config.num_labels
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(0.1)
        
        self.lstm = nn.LSTM(input_size=config.hidden_size * 4, hidden_size=500, num_layers=3, dropout=0.5, bidirectional=True)
        self.qa_outputs = nn.Linear(500*2, config.num_labels)
        
        self.weight_class = config.weight_class
        self.init_weights()
        
    def forward(self, input_ids, attention_mask=None, token_type_ids=None):
        with torch.no_grad():
            outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
            cls_output = torch.cat((outputs[2][-4][:,0, ...],outputs[2][-3][:,0, ...], outputs[2][-2][:,0, ...], outputs[2][-1][:,0, ...]),-1)
            cls_output = self.lstm(cls_output.unsqueeze(0))[0]
            logits = self.qa_outputs(cls_output)
            return logits

    def loss(self, input_ids, attention_mask, token_type_ids, label):
        outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        cls_output = torch.cat((outputs[2][-4][:,0, ...],outputs[2][-3][:,0, ...], outputs[2][-2][:,0, ...], outputs[2][-1][:,0, ...]),-1)
        cls_output = self.lstm(cls_output.unsqueeze(0))[0]
        logits = self.qa_outputs(cls_output)
        
        target = label
        loss = F.cross_entropy(logits, target)

        predict_value = torch.max(logits, 1)[1]
        list_predict = predict_value.cpu().numpy().tolist()
        list_target = target.cpu().numpy().tolist()

        return loss, list_predict, list_target

Really don’t know how to debug this. Any solution for this error. Thanks in advance.

CuongNguyen · June 6, 2020, 10:33am

the problem is on my CrossEntropy. Here is an example of my logits and target I pass to CrossEntropy

tensor([[[-0.0006, -0.0089],
         [ 0.0044, -0.0024],
         [-0.0049,  0.0009],
         [-0.0042, -0.0018],
         [-0.0041, -0.0021],
         [ 0.0028, -0.0017],
         [-0.0033, -0.0095],
         [-0.0022, -0.0034],
         [-0.0064, -0.0046],
         [ 0.0048, -0.0077],
         [-0.0050, -0.0023],
         [ 0.0005, -0.0038],
         [-0.0037, -0.0002],
         [ 0.0008, -0.0033],
         [-0.0006, -0.0026],
         [-0.0013, -0.0122],
         [-0.0124, -0.0082],
         [ 0.0015, -0.0039],
         [ 0.0065, -0.0008],
         [ 0.0003, -0.0117],
         [ 0.0024, -0.0064],
         [-0.0017, -0.0041],
         [-0.0045, -0.0033],
         [ 0.0049,  0.0017],
         [-0.0013, -0.0082],
         [-0.0001, -0.0014]]], device='cuda:1', grad_fn=<AddBackward0>)
tensor([0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
        0, 1], device='cuda:1')