ValueError: Expected input batch_size (324) to match target batch_size (4)

I’m getting the following error. I have tried every of the solution provided on any platform but nothings working. My dataset is of facial expressions and all the images are in grayscale. The code is pasted below:


import torch
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
 
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


trainset = torchvision.datasets.ImageFolder(root='./root', transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.ImageFolder(root='./root', transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral')

import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.fc1 = nn.Linear(64, 1024)
        self.fc2 = nn.Linear(1024, 7)
        #self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64)
        x = F.relu(self.fc1(x))
        #x = F.relu(self.fc2(x))
        x = self.fc2(x)
        return x


net = Net()
save_net = net

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in range(1):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        real, labels = data
        
        inputs = Variable(real)
        labels = Variable(labels)
        #labels = Variable(torch.ones(inputs.size()[0]))
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += float(loss.data[0])
        if i % 100 == 0:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')
torch.save({'state_dict': save_net.state_dict(),
                    'optimizer' : optimizer.state_dict(),
                   }, 'last_brain1.pth')
    
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
#imshow(torchvision.utils.make_grid(images))
#print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

real = Variable(images)
outputs = net(real)


_, predicted = torch.max(outputs, 1)

#print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
#                              for j in range(4)))


correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = Variable(images)
        labels = Variable(labels)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += float((predicted == labels).sum().item())

print('Accuracy of the network: %f %%' % (
    100 * correct / total))

class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = Variable(images)
        labels = Variable(labels)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2f %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Anyone with the solution. Kindly reply. Thanks in advance.

7 Likes

Could you print the shape of x directly before the view in forward?

x = self.pool(F.relu(...
print(x.shape)
x = x.view(x.size(0), -1)
8 Likes

Yes, the output of print(x.shape) is as follow:
torch.Size([4, 64, 9, 9])

Then your linear layer should have in_features=64*9*9. Otherwise you will mix your features into the batch dimension, which yields your current error.
Could you change self.fc1 = nn.Linear(64*9*9, 1024) and also use my code to reshape the tensor?

18 Likes

Thank You so much. It really worked. Can you please tell me how and what you checked to conclude the problem?

Sure!
The first thing running into a size mismatch error is to check your tensor shapes.
In your original code you are using x = x.view(-1, 64).
Since the shape of x is [4, 64, 9, 9], and you forced x to be [-1, 64] = [4*9*9, 64], your batch dimension is now larger than it should be.
This yields exactly the error message for a size mismatch in the batch dimension (324 vs. 4).

The right approach is to keep the batch_size and reshape the feature map into dim1.
Since self.fc1 was too small for 64*9*9, you had to expand it.

19 Likes

Thank You so much. Now I am able to understand the problem :slight_smile:

Thank you so much!!!

I encountered the same problem as I try to simplify my CNN to run faster epochs. How to fix it?

def init(self):
super(Net, self).init()
## Define layers of a CNN
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(25088, 512)
self.fc2 = nn.Linear(512, 133)
self.dropout = nn.Dropout(0.5)

def forward(self, x):
    ## Define forward behavior
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.pool(F.relu(self.conv3(x)))
    print(x.shape)
    x = x.view(-1, 25088)
    x = F.relu(self.fc1(x))
    x = self.dropout(x)
    x = self.fc2(x)
    return x

Solved it based on this thread. Thanks all.

I am facing the similar issues while calculating cross-entropy loss. Here is my sample code:

loss_fn = nn.CrossEntropyLoss()
for epoch in range(100):
    model.train()
    train_acc = 0.0
    train_loss = 0.0
    for images in os.listdir(current_path):
        I = Image.open(images)
        x = image_transform(I).float().reshape([1,3,224,224])
        output = model(x)
        loss = loss_fn(output,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.cpu()
        _, prediction = torch.max(outputs.data, 1)

My labels are as follows:

labels.shape
torch.tensor([16])
output.shape
torch.tensor([1,16])

As per the doccumentation of cross-entropy loss, input and target must be tensor of size (N,C) and (N) .I have tried by resizing it but failed. Can you suggest me any solution. Thanks.

Your current label and output shape do not match. Both should have the batch size in dim0.
Based on your output shape it looks like you are dealing with a single sample and 16 classes. If that’s correct, your label should have the shape [1] and contain the class index in the range [0, 15].

1 Like

sir can u tell me what is 1024. is it your hidden layers? or what how u calculate this?
i am asking about this:
self.fc1 = nn.Linear(64, 1024)

Sir the picture is grayscale and at the first Conv layer he use self.conv1 = nn.Conv2d(3, 32, 5)
is this possible?

I assume the single channel was duplicated, since transforms.Normalize() has three values for mean and std. Also imshow contains a np.transpose with 3 dimensions, which also suggests duplicated channels.

However, if you are dealing with grayscale images and would like to keep a single channel, you are right and should set the number of input channels in the first conv layer to 1.

I have a similar issue, which I managed to solved by changing the arrangement of the data, but I cannot understand why it could not work before. So I would like someone to explain it to me if possible. This is my initial dataset class:

class ModelDataset(Dataset):
    
    def __init__(self, data, labels, ind):

        # take only the data specified by ind. ind is a list of indexes
        self.data = data[:,ind,:]  # size = (6, ind, 256)
        self.labels = labels[ind]
       
    def __getitem__(self, index):
        
        pair = self.data[:,index,:] # pair = (6,256)
        label = self.labels[index]
        
        x1 = pair[:, :128]
        x2 = pair[:, 128:]
        
        return x1.T, x2.T, label
    
    def __len__(self):
        return len(self.data)
    

So when I was creating a dataloader on this dataset, even though I was specifing the batch size at a certain value the maximum batch size was stuck at 6. I thought that maybe the 3D arrangement of the self.data was the issue so I rearranged the self.data to be a 2D matrix and after that I was able to get the batch size I wanted. But I cannot understand why I could not make it work before. When I was fixing the batch size to be lower than 6 I was getting the correct data from the dataloader. I checked that.

The batch dimension is usually in dim0 and the DataLoader will use the default_collate method to create a batch of samples using this structure.
Based on your code snippet it looks like the batch dimension is in dim1, which yields this behavior.
If you don’t want to flatten your data, you could permute the dimensions:

class ModelDataset(Dataset):
    
    def __init__(self, data, labels, ind):

        # take only the data specified by ind. ind is a list of indexes
        self.data = data[:,ind,:].permute(1, 0, 2)  # size = (ind, 6, 256)
        self.labels = labels[ind]
       
    def __getitem__(self, index):
        
        pair = self.data[index] # pair = (6,256)
        label = self.labels[index]
        
        x1 = pair[:, :128]
        x2 = pair[:, 128:]
        
        return x1.T, x2.T, label
    
    def __len__(self):
        return len(self.data)

Let me know, if that works for you!

Thank you very much! It worked perfectly!

I read all the above mentioned remedies, but still following codes gives error after running few iterations. Please help me in pointing out the mistake:
The error is as mentioned below:

ValueError: Expected input batch_size (32) to match target batch_size (64).

The code is:

class CNN_MNIST(nn.Module):
    def __init__(self):
        super(CNN_MNIST,self).__init__()
        
        # convolution layer 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels= 32, kernel_size=5,
                              stride=1,padding=2)
        
        # ReLU activation 
        self.relu1 = nn.ReLU()
        
        # maxpool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2,stride=2)
        
        # convolution 2
        self.cnn2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5,
                              stride=1,padding=2)
        
        # ReLU activation 
        self.relu2 = nn.ReLU()
        
        # maxpool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2,stride=2)
        
        # fully connected 1
        self.fc1 = nn.Linear(7*7*64,1000)
        # fully connected 2
        self.fc2 = nn.Linear(1000,10)
        
    def forward(self,x):
        
        # convolution 1
        out = self.cnn1(x)
        # activation function
        out = self.relu1(out)
        # maxpool 1
        out = self.maxpool1(out)
        
        # convolution 2
        out = self.cnn2(out)
        # activation function
        out = self.relu2(out)
        # maxpool 2
        out = self.maxpool2(out)
        
        # flatten the output
        out = out.view(out.size(0),-1)
        
        # fully connected layers
        out = self.fc1(out)
        out = self.fc2(out)
        
        return out
# model trainning
count = 0
loss_list = []
iteration_list = []
accuracy_list = []

for epoch in range(int(n_epochs)):
    for i, (image,labels) in enumerate(train_loader):
        
        train = Variable(image)
        labels = Variable(labels)
        
        # clear gradient
        optimizer.zero_grad()
        
        # forward propagation
        output = cnn_model(train)
        
        # calculate softmax and cross entropy loss
        loss = error(output,label)
        
        # calculate gradients
        loss.backward()
        
        # update the optimizer
        optimizer.step()
        
        count += 1
        
        if count % 50 ==0:
            # calculate the accuracy
            correct = 0
            total = 0
            
            # iterate through the test data
            for image, labels in test_loader:
                
                test = Variable(image)
                
                # forward propagation
                output = cnn_model(test)
                
                # get prediction
                predict = torch.max(output.data,1)[1]
                
                # total number of labels
                total += len(labels)
                
                # correct prediction
                correct += (predict==labels).sum()
                
            # accuracy
            accuracy = 100*correct/float(total)
            
            # store loss, number of iteration, and accuracy
            loss_list.append(loss.data)
            iteration_list.append(count)
            accuracy_list.append(accuracy)
            
            # print loss and accurcay as the algorithm progresses
            if count % 500 ==0:
                print('Iteration :{}    Loss :{}    Accuracy :{}'.format(count,loss.item(),accuracy))

The Error is:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-9e93a242961b> in <module>
     18 
     19         # calculate softmax and cross entropy loss
---> 20         loss = error(output,label)
     21 
     22         # calculate gradients

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    545             result = self._slow_forward(*input, **kwargs)
    546         else:
--> 547             result = self.forward(*input, **kwargs)
    548         for hook in self._forward_hooks.values():
    549             hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
    914     def forward(self, input, target):
    915         return F.cross_entropy(input, target, weight=self.weight,
--> 916                                ignore_index=self.ignore_index, reduction=self.reduction)
    917 
    918 

~\Anaconda3\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   1993     if size_average is not None or reduce is not None:
   1994         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 1995     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   1996 
   1997 

~\Anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   1820     if input.size(0) != target.size(0):
   1821         raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
-> 1822                          .format(input.size(0), target.size(0)))
   1823     if dim == 2:
   1824         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

ValueError: Expected input batch_size (32) to match target batch_size (64).

Based on the error message, it looks like output and label do not have the same batch size.
Could you print the shapes of train, labels, and output and check, why the shapes are different?
I couldn’t find any obvious error in reshaping by skimming through your code.

PS: I’m not sure, if it’s on purpose, but you are not using any activation function on the output of self.fc1.