ValueError: Expected input batch_size (1) to match target batch_size (64)

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(64 * 3 * 8 * 8, 1000)
        self.fc2 = nn.Linear(1000, 200)
        self.fc3 = nn.Linear(200, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)


def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        data = data.view(-1, args.batch_size*3*8*8)
        target = target.view(-1, args.batch_size)
        output = model(data)
        loss = F.nll_loss(output, target[0])
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = data.view(-1, args.test_batch_size*3*8*8)
            target = target.view(-1, args.test_batch_size)
            output = model(data)
            # sum up batch loss
            test_loss += F.nll_loss(output, target[0], reduction='sum').item()
            # get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))


def main():
    # Training settings
    parser = argparse.ArgumentParser(description='FNN')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument(
        '--test-batch-size', type=int, default=64, metavar='N',
        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval', type=int, default=1000, metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model', action='store_true', default=True,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    
    # Transforms
    simple_transform = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]
    )

    # Dataset
    train_dataset = datasets.ImageFolder('data/train/', simple_transform)
    valid_dataset = datasets.ImageFolder('data/valid/', simple_transform)

    # Data loader
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=2)

    test_loader = torch.utils.data.DataLoader(
        dataset=valid_dataset, batch_size=args.test_batch_size, shuffle=False,
        num_workers=2)

    model = Net().to(device)
    optimizer = optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "fnn.pt")

if __name__ == '__main__':
    main()

The view operations of data and target look strange.
Currently you are using

data = data.view(-1, args.test_batch_size*3*8*8)
target = target.view(-1, args.test_batch_size)

Generally and also based on your model code, you should provide the data as [batch_size, in_features] and the target as [batch_size] containing class indices.
Could you change that and try to run your code again?

PS: I’ve formatted your code for better readability. You can add code snippets using three backticks ``` :wink:

Thanks for your quick response. Here I mention what exactly I want to do to help as much as possible. I have in data the folderstrain and valid and within them 9 folders that you would be the classes. They contain RGB images of 8x8 pixels. The idea is to use an MLP to classify future 8x8 images in one of these 9 classes.

I am using batch_size = 8. Here I made some changes according to your comments. You will tell me the necessary changes to make to meet the objective.

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3 * 8 * 8, 1000)
        self.fc2 = nn.Linear(1000, 200)
        self.fc3 = nn.Linear(200, 9)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)


def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        data = data.view(args.batch_size,  3 * 8 * 8)
        target = target.view(args.batch_size, -1)
        output = model(data)
        loss = F.nll_loss(output, target[0])
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = data.view(args.test_batch_size,  3 * 8 * 8)
            target = target.view(args.test_batch_size, -1)
            output = model(data)
            # sum up batch loss
            test_loss += F.nll_loss(output, target[0], reduction='sum').item()
            # get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))


def main():
    # Training settings
    parser = argparse.ArgumentParser(description='FNN')
    parser.add_argument('--batch-size', type=int, default=8, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument(
        '--test-batch-size', type=int, default=8, metavar='N',
        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval', type=int, default=100, metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model', action='store_true', default=True,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    
    # Transforms
    simple_transform = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]
    )

    # Dataset
    train_dataset = datasets.ImageFolder('data/train/', simple_transform)
    valid_dataset = datasets.ImageFolder('data/valid/', simple_transform)

    # Data loader
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=2)

    test_loader = torch.utils.data.DataLoader(
        dataset=valid_dataset, batch_size=args.test_batch_size, shuffle=False,
        num_workers=2)

    model = Net().to(device)
    optimizer = optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "fnn.pt")

if __name__ == '__main__':
    main()

I think target should be viewed as target = target.view(args.batch_size).
Did you try to run the code and checked for other errors?

Perfect.
Making target = target.view (args.batch_size) resolves the problem.

Finally the code was as follows:

from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3 * 8 * 8, 1000)
        self.fc2 = nn.Linear(1000, 200)
        self.fc3 = nn.Linear(200, 9)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)


def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        data = data.view(args.batch_size,  3 * 8 * 8)
        target = target.view(args.batch_size)
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = data.view(args.test_batch_size,  3 * 8 * 8)
            target = target.view(args.test_batch_size)
            output = model(data)
            # sum up batch loss
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            # get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))


def main():
    # Training settings
    parser = argparse.ArgumentParser(description='FNN')
    parser.add_argument('--batch-size', type=int, default=8, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument(
        '--test-batch-size', type=int, default=8, metavar='N',
        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=100, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval', type=int, default=100, metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model', action='store_true', default=True,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    
    # Transforms
    simple_transform = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]
    )

    # Dataset
    train_dataset = datasets.ImageFolder('data/train/', simple_transform)
    valid_dataset = datasets.ImageFolder('data/valid/', simple_transform)

    # Data loader
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=2)

    test_loader = torch.utils.data.DataLoader(
        dataset=valid_dataset, batch_size=args.test_batch_size, shuffle=False,
        num_workers=2)

    model = Net().to(device)
    optimizer = optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "fnn.pt")

if __name__ == '__main__':
    main()

Again thanks.

Hi @ptrblck ,
Please could you explain in brief why these error occurs ?

Also what are the different case scenario these cases occurs , in simple codes?

Thankyou for answer in advance :slight_smile:

I had got same error but got no idea where could it occur

Summary

model = Classifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

epochs = 30
steps = 0

train_losses, test_losses = [], []
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:

    optimizer.zero_grad()
    
    log_ps = model(images)
    loss = criterion(log_ps, labels)
    loss.backward()
    optimizer.step()
    
    running_loss += loss.item()
    
else: #after each loop complete
    test_loss = 0
    train_loss = 0
    
    ## TODO: Implement the validation pass and print out the validation accuracy
    with torch.no_grad():
        for images, labes in testloader:
            log_ps  = model(images)
            test_loss += criterion(log_ps, labels) #update test loss
            
            ps = torch.exp(log_ps) #actual probs
            top_p, top_class = ps.topk(1, dim=1) #gives actual predicted classes
            equals = top_class == labels.view(*top_class.shape) #whether predicted class match with actual class
            accuracy += torch.mean(equals.type(torch.FloatTensor))
    train_losses.append(running_loss/len(trainloader))
    test_losses.append(test_loss/len(testloader))

This error is raised, if the batch sizes of the model output and target do not match.
E.g. this small code example shows the issue, where the data uses a batch size of 5, while the target uses one of 4:


nb_classes = 10
output = torch.randn(5, nb_classes, requires_grad=True) # [batch_size, nb_classes]
target = torch.randint(0, nb_classes, (4,)) # [batch_size]

criterion = nn.CrossEntropyLoss()
loss = criterion(output, target)
> ValueError: Expected input batch_size (5) to match target batch_size (4).

Check the shapes of log_ps and labels in your code and make sure they have the same batch size.

1 Like

oh my stupid fault , I was having typo of labes instead labels and was working previously written labels variable in criterion. Thank you :slight_smile: I appretiate. .

Hello, am having a similar error ,
Am doing Phase retrieval in digital holography but the error below is messing me up. any help will be greatly appreciated.
Here is CN network, and am using a batch size of 2.

class Network(nn.Module):
def init(self, in_channels=2 ):

     super(Network,self).__init__()

     self.Conv1 =nn.Sequential(
            nn.Conv2d(in_channels,16, 3,1,1),nn.ReLU(),
            nn.MaxPool2d(3,1,1),
            nn.Conv2d(16,2, 3),nn.ReLU())
     
     self.Conv2 = nn.Sequential(
            nn.Conv2d(in_channels, 16,3,1,1),nn.ReLU(),
			nn.MaxPool2d(3,1,1),
            nn.Conv2d(16, 4, 3),nn.ReLU())

     self.Conv3 =  nn.Sequential(
	        nn.Conv2d(in_channels, 16,3,1,1),nn.ReLU(),
		    nn.MaxPool2d(3,1,1),
            nn.Conv2d(16, 8, 3),nn.ReLU())
     
     self.Conv4 =nn.Sequential(
	        nn.Conv2d(in_channels, 16, 3,1,1),nn.ReLU(),
			nn.MaxPool2d(3,1,1),
            nn.Conv2d(16,16, 3),nn.ReLU())
           

     self.Upsample1 = nn.Sequential(
              nn.Upsample(size=(256, 256), scale_factor=None,align_corners=True, mode="bilinear"),
              nn.Conv2d(2, 4, 3),nn.ReLU(),
              
              nn.Upsample(size=(256, 256), scale_factor=None,align_corners=True, mode='bilinear'),
              nn.Conv2d(4, 8, 3),nn.ReLU(),
              
              nn.Upsample(size=(256, 256), scale_factor=None,align_corners=True, mode='bilinear'),
              nn.Conv2d(8, 16, 3),nn.ReLU())

     self.Upsample2 = nn.Sequential(
              nn.Upsample(size=(256, 256), scale_factor=None,align_corners=True, mode='bilinear'),
              nn.Conv2d(4, 8, 3),nn.ReLU(),
              
              nn.Upsample(size=(256, 256), scale_factor=None,align_corners=True, mode='bilinear'),
              nn.Conv2d(8, 16, 3),nn.ReLU())              
                
     self.Upsample3 = nn.Sequential(
              nn.Upsample(size=(256, 256), scale_factor=None,align_corners=True, mode='bilinear'),
              nn.Conv2d(8, 16, 3),nn.ReLU())
     
     self.Conv1a = nn.Conv2d(16, 16, 3)
     self.Conv2a = nn.Conv2d(16, 16, 3)                   
     self.Conv3a = nn.Conv2d(16, 16, 3)
     self.Conv4a = nn.Conv2d(16, 16, 3) 
     self.Conv5 =  nn.Conv2d(16, 64, 3)

def forward(self,x):
   # print(x.size)
    w = self.Conv1(x)
    w = self.Upsample1(w)
    w=  F.relu(self.Conv1a(w))

    k = self.Conv2(x)
    k = self.Upsample2(k)
    k=  F.relu(self.Conv2a(k))

    y = self.Conv3(x)
    y = self.Upsample3(y)
    y=  F.relu(self.Conv3a(y))

    z = self.Conv4(x)
    z=  F.relu(self.Conv4a(z))
    print('*****',w.shape,k.shape,y.shape,z.shape)
    
    p = torch.cat([w, k, y, z], 0)
    print(p.shape)
    q = self.Conv5(p)
    print(q.shape)


    return p

net= Network()

Below is the error am getting…

***** torch.Size([1, 16, 252, 252]) torch.Size([1, 16, 252, 252]) torch.Size([1, 16, 252, 252]) torch.Size([1, 16, 252, 252])
torch.Size([4, 16, 252, 252])
torch.Size([4, 64, 250, 250])
output_shape torch.Size([4, 16, 252, 252])
Traceback (most recent call last):

File “E:\DMU\pytnewCNN.ipy”, line 201, in
loss =criterion(outputs, labels)

File “C:\Users\Hp\anaconda3\lib\site-packages\torch\nn\modules\module.py”, line 550, in call
result = self.forward(*input, **kwargs)

File “C:\Users\Hp\anaconda3\lib\site-packages\torch\nn\modules\loss.py”, line 932, in forward
ignore_index=self.ignore_index, reduction=self.reduction)

File “C:\Users\Hp\anaconda3\lib\site-packages\torch\nn\functional.py”, line 2317, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)

File “C:\Users\Hp\anaconda3\lib\site-packages\torch\nn\functional.py”, line 2113, in nll_loss
.format(input.size(0), target.size(0)))

ValueError: Expected input batch_size (4) to match target batch_size (2).

I’m not familiar with your model, but this line of code:

p = torch.cat([w, k, y, z], 0)

would concatenate the different activations in the batch dimension, which is most likely wrong.
If you would like to concatenate the activations instead in the channel dimension, you would have to use dim=1.
Currently this line is changing the batch size, which is probably creating the error.