Expected more than 1 value per channel when training, got input size torch.Size([1, xx])

Consider the following network snippet:

def __init__(self, model, n_class, dropout_rate,device):
        super(NewModel, self).__init__()
        
        self.bert = model
        self.linear = nn.Linear(self.bert.config.hidden_size, 2)
        self.linear_1 = nn.Linear(self.bert.config.hidden_size, self.bert.config.hidden_size)
        
        self.dropout_rate = dropout_rate
        self.dropout_1 = nn.Dropout(p = self.dropout_rate)

        self.activation = nn.LeakyReLU()
        
        self.bn = nn.BatchNorm1d(num_features = self.bert.config.hidden_size)
       
    def forward(self, batch):
        outputs = self.bert(
            input_ids = batch[0].to(self.device),
            attention_mask = batch[1].to(self.device),
            token_type_ids = None,
            position_ids = None,
            head_mask = None,
            inputs_embeds = None,
        )

        output = outputs[0]
        pooled_output=output[:,0]
        pooled_output = pooled_output.unsqueeze(0)

        pooled_output_1 = self.dropout_1(self.bn(pooled_output))

        logits = self.linear(F.leaky_relu(self.linear_1(pooled_output_1)))

My batch size is 16 and during training I get this error:

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 16])

I already set drop_last=True in the DataLoader, but the error persists.
Any help would be greatly appreciated.

Hi,

What is the stack trace that comes with the error?
I think the problem is that you try to do batchnorm with a Tensor of size [1, 16]. So that has batch size of 1 and 16 channels.
But then there is only a single value and so the standard deviation computed by the batchnorm will lead to infinites when you divide by it.

Thank you for your time!
Here it is:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-363-0058cef7d67b> in <module>
      9 
     10 import numpy as np
---> 11 train(model,train_dataloader,validation_dataloader)
     12 print("")
     13 print("Training complete!")

<ipython-input-348-f7eefdedff7c> in train(model, train_dataloader, validation_dataloader)
     72             # The documentation for this `model` function is here:
     73             # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
---> 74             outputs = model(batch)
     75 
     76             # The call to `model` always returns a tuple, so we need to pull the

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

<ipython-input-362-052e4983c882> in forward(self, batch)
     49         pooled_output = pooled_output.unsqueeze(0)
     50 
---> 51         pooled_output_1 = self.dropout_1(self.bn(pooled_output))
     52         pooled_output_2 = self.dropout_2(self.bn2(pooled_output))
     53 

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/batchnorm.py in forward(self, input)
    105             input, self.running_mean, self.running_var, self.weight, self.bias,
    106             self.training or not self.track_running_stats,
--> 107             exponential_average_factor, self.eps)
    108 
    109 

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
   1664             size_prods *= size[i + 2]
   1665         if size_prods == 1:
-> 1666             raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
   1667 
   1668     return torch.batch_norm(

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 16])

Yes that does point to the batchnorm. So the analysis above is the answer.

I realized that it was because of the BatchNorm… that’s why I tried drop_last = True in the DataLoader. But it didn’t work so I was searching for a different solution :slightly_smiling_face:

Actually looking at the code in the stack trace

     49         pooled_output = pooled_output.unsqueeze(0)
     50 
---> 51         pooled_output_1 = self.dropout_1(self.bn(pooled_output))

Looks like the pooled output was a 1D Tensor, and the unsqueeze added the extra dimension of size 1. In this case, is 16 supposed to be the batch size and you should have only one channel? If so, then you should unsqueeze dimension 1.