ValueError: Expected input batch_size (980) to match target batch_size (20)

import os
from torchvision import datasets
from torchvision.transforms import transforms
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
### TODO: Write data loaders for training, validation, and test sets
# define training and test data directories
data_dir = '/data/dog_images/'
train_dir = os.path.join(data_dir, 'train/')
valid_dir = os.path.join(data_dir, 'valid/')
test_dir = os.path.join(data_dir, 'test/')


import os
classes = os.listdir(train_dir)
print(len(classes))


data_transform = transforms.Compose([transforms.RandomResizedCrop(224), 
                                      transforms.ToTensor()])



train_data = datasets.ImageFolder(train_dir, transform=data_transform)
valid_data = datasets.ImageFolder(valid_dir, transform=data_transform)
test_data = datasets.ImageFolder(test_dir, transform=data_transform)

# print out some data stats
print('Num training images: ', len(train_data))
print('Num training images: ', len(valid_data))
print('Num test images: ', len(test_data))
## Specify appropriate transforms, and batch_sizes
# define dataloader parameters
batch_size = 20
num_workers=0

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
                                           num_workers=num_workers, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, 
                                           num_workers=num_workers, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
                                          num_workers=num_workers, shuffle=True)




import torch.nn as nn
import torch.nn.functional as F
import argparse
# define the CNN architecture
class Net(nn.Module):
    ### TODO: choose an architecture, and complete the class
    def __init__(self):
        super(Net, self).__init__()
        ## Define layers of a CNN
        #convolutional layer 32x32x3
        self.conv1 = nn.Conv2d(3,16,3,padding=1)
        #convolutional layer 16x16x16
        self.conv2 = nn.Conv2d(16,32,3, padding=1)
        #convolutional layer 8x8x32
        self.conv3 = nn.Conv2d(32,64,3, padding=1)
        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # linear layer (64 * 4 * 4 -> 500)
        self.fc1 = nn.Linear(64 * 4 * 4, 500)
        # linear layer (500 -> 10)
        self.fc2 = nn.Linear(500, 133)
        # dropout layer (p=0.25)
        self.dropout = nn.Dropout(0.25)

        
    def forward(self, x):
        ## Define forward behavior
        x = self.pool(F.relu(self.conv1(x)))
        print(x.shape)
        x = self.pool(F.relu(self.conv2(x)))
        print(x.shape)
        x = self.pool(F.relu(self.conv3(x)))
        print(x.shape)
        # flatten image input
        x = x.view(-1, 64 * 4 * 4)
        # add dropout layer
        x = self.dropout(x)
        print(x.shape)
        # add 1st hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        print(x.shape)
        # add dropout layer
        x = self.dropout(x)
        print(x.shape)
        # add 2nd hidden layer, with relu activation function
        x = self.fc2(x)
        return x

#-#-# You so NOT have to modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()
print(model_scratch)
# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

import torch.optim as optim

### : select loss function
criterion_scratch = nn.CrossEntropyLoss()

### : select optimizer
optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=0.02)


def train(n_epochs, loader_train,loader_valid, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loader_train):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
             #train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
             # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            #train_loss += loss.item()*data.size(0)
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loader_valid):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss

            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
     
            
    # return trained model
    return model


# train the model
model_scratch = train(3, train_loader, valid_loader, model_scratch, optimizer_scratch, 
                      criterion_scratch, use_cuda, 'model_scratch.pt')

# load the model that got the best validation accuracy
#model_scratch.load_state_dict(torch.load('model_scratch.pt'))


def train(n_epochs, loader_train,loader_valid, model, optimizer, criterion, use_cuda, save_path):

    """returns trained model"""

    # initialize tracker for minimum validation loss

    valid_loss_min = np.Inf 

    

    for epoch in range(1, n_epochs+1):

        # initialize variables to monitor training and validation loss

        train_loss = 0.0

        valid_loss = 0.0

        

        ###################

        # train the model #

        ###################

        model.train()

        for batch_idx, (data, target) in enumerate(loader_train):

            # move to GPU

            if use_cuda:

                data, target = data.cuda(), target.cuda()

            ## find the loss and update the model parameters accordingly

            ## record the average training loss, using something like

             #train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))

             # clear the gradients of all optimized variables

            optimizer.zero_grad()

            # forward pass: compute predicted outputs by passing inputs to the model

            output = model(data)

            # calculate the batch loss

            loss = criterion(output, target)

            # backward pass: compute gradient of the loss with respect to model parameters

            loss.backward()

            # perform a single optimization step (parameter update)

            optimizer.step()

            # update training loss

            #train_loss += loss.item()*data.size(0)

            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))

        ######################    

        # validate the model #

        ######################

        model.eval()

        for batch_idx, (data, target) in enumerate(loader_valid):

            # move to GPU

            if use_cuda:

                data, target = data.cuda(), target.cuda()

            ## update the average validation loss

​

            

        # print training/validation statistics 

        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(

            epoch, 

            train_loss,

            valid_loss

            ))

        

        ## TODO: save the model if validation loss has decreased

            

    # return trained model

    return model

​

​

# train the model

model_scratch = train(3, train_loader, valid_loader, model_scratch, optimizer_scratch, 

                      criterion_scratch, use_cuda, 'model_scratch.pt')

​

# load the model that got the best validation accuracy

#model_scratch.load_state_dict(torch.load('model_scratch.pt'))

**torch.Size([20, 16, 112, 112])**
**torch.Size([20, 32, 56, 56])**
**torch.Size([20, 64, 28, 28])**
**torch.Size([980, 1024])**
**torch.Size([980, 500])**
**torch.Size([980, 500])**

Your view to flatten to output of conv3 is wrong.
x has the shape [20, 64, 28, 28], while you are using x.view(-1, 64*4*4), which creates an activation of [980, 1024] thus changing the batch size.

Use x = x.view(x.size(0), -1) instead and set the in_features of fc1 to 64*28*28.

PS: I’ve edited your post for better readability.
You can post code snippets by wrapping them into three backticks ``` :wink: