Expected input batch_size (1605632) to match target batch_size (6422528)

Hello all,

I’m facing an error while training my model using PyTorch.
Everything is in the title, after looking arround the discuss, I guess everything is about the shape of these input / output not being the same.

 def train(self):
        bce_losses = AverageMeter()
        image_gradient_losses = AverageMeter()
        image_gradient_criterion = ImageGradientLoss().to(self.device)
        bce_criterion = nn.CrossEntropyLoss().to(self.device)

        for epoch in range(self.epoch, self.num_epoch):
            bce_losses.reset()
            image_gradient_losses.reset()
            for step, (image, gray_image, mask) in enumerate(self.data_loader):
                image = image.to(self.device)
                mask = mask.to(self.device)
                gray_image = gray_image.to(self.device)

                pred = self.net(image)

                pred_flat = pred.permute(0, 2, 3, 1).contiguous().view(-1, self.num_classes)
                mask_flat = mask.squeeze(1).view(-1).long()

                image_gradient_loss = image_gradient_criterion(pred, gray_image)
                bce_loss = bce_criterion(pred_flat, mask_flat)

                loss = bce_loss + self.gradient_loss_weight * image_gradient_loss

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                bce_losses.update(bce_loss.item(), self.batch_size)
                image_gradient_losses.update(self.gradient_loss_weight * image_gradient_loss, self.batch_size)
                iou = iou_loss(pred, mask)

                # save sample images
                if step % 10 == 0:
                    print(f"Epoch: [{epoch}/{self.num_epoch}] | Step: [{step}/{self.image_len}] | "
                          f"Bce Loss: {bce_losses.avg:.4f} | Image Gradient Loss: {image_gradient_losses.avg:.4f} | "
                          f"IOU: {iou:.4f}")
                if step % self.sample_step == 0:
                    self.save_sample_imgs(image[0], mask[0], torch.argmax(pred[0], 0), self.sample_dir, epoch, step)
                    print('[*] Saved sample images')

            torch.save(self.net.state_dict(), f'{self.checkpoint_dir}/MobileHairNet_epoch-{epoch}.pth')

and here is errors I get:

(mobile-segmentation) fps60@fps60-MS-7B98:~/Documents/workspaces/mobile-hair-segmentation-pytorch-master$ python main.py --data_path data
[*] Load checkpoint in  checkpoints
[!] No checkpoint in  checkpoints
/home/fps60/anaconda3/envs/mobile-segmentation/lib/python3.6/site-packages/torch/nn/_reduction.py:49: UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.
  warnings.warn(warning.format(ret))
/home/fps60/anaconda3/envs/mobile-segmentation/lib/python3.6/site-packages/torch/nn/modules/upsampling.py:129: UserWarning: nn.Upsample is deprecated. Use nn.functional.interpolate instead.
  warnings.warn("nn.{} is deprecated. Use nn.functional.interpolate instead.".format(self.name))
Traceback (most recent call last):
  File "main.py", line 33, in <module>
    main(config)
  File "main.py", line 28, in main
    trainer.train()
  File "/home/fps60/Documents/workspaces/mobile-hair-segmentation-pytorch-master/src/train.py", line 85, in train
    bce_loss = bce_criterion(pred_flat, mask_flat)
  File "/home/fps60/anaconda3/envs/mobile-segmentation/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/fps60/anaconda3/envs/mobile-segmentation/lib/python3.6/site-packages/torch/nn/modules/loss.py", line 904, in forward
    ignore_index=self.ignore_index, reduction=self.reduction)
  File "/home/fps60/anaconda3/envs/mobile-segmentation/lib/python3.6/site-packages/torch/nn/functional.py", line 1970, in cross_entropy
    return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
  File "/home/fps60/anaconda3/envs/mobile-segmentation/lib/python3.6/site-packages/torch/nn/functional.py", line 1788, in nll_loss
    .format(input.size(0), target.size(0)))
ValueError: Expected input batch_size (1605632) to match target batch_size (6422528).

If you have any idea, let me know, I’m waiting for you :slight_smile:

Thanks, have a good day!

Based on the error message, it seems pred_flat and mask_flat cause this error, since the batch sizes differ.
Could you check the shapes of these tensors before passing them to the criterion and also before reshaping them?