ValueError: Expected input batch_size (10) to match target batch_size (3)

I’m using CNNs to predict the gender of people. The shape before passing my data through my CNN is ([23705, 48, 48]).

class neural_network(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, 3),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(p=0.2),
            nn.MaxPool2d((2,2)))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, 3),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d((2,2)),
            nn.Dropout(p=0.25),
            nn.ReLU())
        
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, 3),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.Dropout(p=0.2),
            nn.MaxPool2d((2,2)))
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(256, 512, 3),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Dropout(p=0.2),
            nn.MaxPool2d((2,2)))
        
        self.flatten = nn.Flatten()
#         self.fc1 = nn.Linear(1024, 512)
        self.dropout = nn.Dropout(p=0.25)
        self.output = nn.Linear(512, 2)

    def forward(self, x):
        x = x.view(-1,1,48,48)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.flatten(x)
    
#         x = self.fc1(x)
#         x = self.dropout(x)
        x = self.output(x)
        return F.log_softmax(x, dim=1)

Here’s what happens when I train the model:

EPOCHS = 10
BATCH_SIZE = 10
y = gender_labels
y = y.type(torch.LongTensor)

correct = 0
predictions = []
correct_labels = []
total = 0

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(X), BATCH_SIZE)):
        
        train_X = X[i:i+BATCH_SIZE]
        train_y = y[i:i+BATCH_SIZE]
        neural_net.zero_grad()
        output = neural_net(train_X)
        loss = loss_function(output, train_y)
        loss.backward()
        optimizer.step()
        
        for idx, i in enumerate(output):
            if torch.argmax(i) == train_y[idx]:
                correct += 1
            predictions.append(torch.argmax(i).tolist())
            correct_labels.append(train_y[idx].tolist())
            total += 1
    print(loss)

print("Accuracy: ", round(correct/total, 3))

Error:

100%|█████████▉| 2367/2371 [06:36<00:00,  5.97it/s]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-82-f7f5dc83f425> in <module>
     16         neural_net.zero_grad()
     17         output = neural_net(train_X)
---> 18         loss = loss_function(output, train_y)
     19         loss.backward()
     20         optimizer.step()

~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~\anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
    209 
    210     def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 211         return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
    212 
    213 

~\anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2213 
   2214     if input.size(0) != target.size(0):
-> 2215         raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
   2216                          .format(input.size(0), target.size(0)))
   2217     if dim == 2:

ValueError: Expected input batch_size (10) to match target batch_size (3).

I’m not a PyTorch god or expert or anything like that, just an intermediate. Appreciate the help!

It seems that train_y is smaller in one iteration for some reason.
Could you check the shape of X and y and make sure they contain the same number of samples?

What’s your use case btw.?

@ptrblck he overall purpose of the “project” is to just help me get a better understanding of PyTorch. I’ve done a few courses on Udacity and YouTube but it didn’t really “click” with me. I started replicating projects myself like this one since I was able to get a better understanding of it. I’m doing gender and age of people so that I can learn more about data preprocessing and training.

UPDATE: the model accuracy is capping at 50% and I’m not even sure why. Can you take a look and lmk what I can do to increase it to at least 80%? @ptrblck

neural_network(
  (layer1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.1, inplace=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.1, inplace=False)
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (layer3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.1, inplace=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (layer4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.1, inplace=False)
    (4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (flatten): Flatten()
  (dropout): Dropout(p=0.5, inplace=False)
  (output): Linear(in_features=512, out_features=2, bias=True)
)