Dataloader and nn.module yield different batch size

tsly123 · July 1, 2019, 3:06am

Hi,
I am running StarGAN code [https://github.com/yunjey/stargan] and modifying the discriminator a bit.

class Discriminator(nn.Module):
    """Discriminator network with PatchGAN."""
    def __init__(self, image_size=128, conv_dim=64, c_dim=5, repeat_num=6):
        super(Discriminator, self).__init__()
        layers = []
        layers.append(nn.Conv2d(3, conv_dim, kernel_size=4, stride=2, padding=1))
        layers.append(nn.LeakyReLU(0.01))

        curr_dim = conv_dim
        for i in range(1, repeat_num):
            layers.append(nn.Conv2d(curr_dim, curr_dim*2, kernel_size=4, stride=2, padding=1))
            layers.append(nn.LeakyReLU(0.01))
            curr_dim = curr_dim * 2

        kernel_size = int(image_size / np.power(2, repeat_num))
        self.main = nn.Sequential(*layers)
        self.conv1 = nn.Conv2d(curr_dim, 1, kernel_size=3, stride=1, padding=1, bias=False)
        self.conv2 = nn.Conv2d(curr_dim, c_dim, kernel_size=kernel_size, bias=False)
   
        self.c3    = nn.Linear(2048, 512)
        self.c4    = nn.ReLU6()
        self.c5    = nn.Linear(512, 128)
                 

    def forward(self, x):
        h = self.main(x)
        out_src = self.conv1(h)
        out_cls = self.conv2(h)

        out_feat = h.view(h.size(0), -1)
        print(h.shape)
        out_feat = self.c3(out_feat)
        out_feat = self.c4(out_feat)
        out_feat = self.c5(out_feat)

        return out_feat.squeeze(), out_src, out_cls.view(out_cls.size(0), out_cls.size(1))

In forward, I print the output shape of the conv blocks print(h.shape) and it yield

torch.Size([16])                     # dataloader batch size
torch.Size([16, 2048, 1, 1])
torch.Size([16, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([16, 2048, 1, 1])
torch.Size([16])                    # Dataloader batch size
torch.Size([16, 2048, 1, 1])
torch.Size([16, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
torch.Size([7, 2048, 1, 1])
...

Why there are batch size of 7 in here? Batch size is defined as 16. I also printed the dataloader batch size, and it = 16.
Thank you for your time.

smth · July 1, 2019, 5:02am

probably the last batch in the training set is not 16. For example, if you have dataset of size 23, then first mini-batch will be size 16, second will be size 7.

tsly123 · July 1, 2019, 5:42am

Thank @smth,
My dataset has almost 900 samples. If I understand it right, it should be a lot of batch size of 16 before the last batch 7 as you said. However, there are many batch size of 7. That is what I don’t understand.

ptrblck · July 1, 2019, 9:35am

Are you using DataParallel, thus could the data be chunked?

tsly123 · July 1, 2019, 12:41pm

Hi @ptrblck
No, I don’t use DataParallel. I use the dataloader from StarGAN, mostly the get_loader for RaFD dataset.

from torch.utils import data
from torchvision import transforms as T
from torchvision.datasets import ImageFolder
from PIL import Image
import torch
import os
import random


class CelebA(data.Dataset):
    """Dataset class for the CelebA dataset."""

    def __init__(self, image_dir, attr_path, selected_attrs, transform, mode):
        """Initialize and preprocess the CelebA dataset."""
        self.image_dir = image_dir
        self.attr_path = attr_path
        self.selected_attrs = selected_attrs
        self.transform = transform
        self.mode = mode
        self.train_dataset = []
        self.test_dataset = []
        self.attr2idx = {}
        self.idx2attr = {}
        self.preprocess()

        if mode == 'train':
            self.num_images = len(self.train_dataset)
        else:
            self.num_images = len(self.test_dataset)

    def preprocess(self):
        """Preprocess the CelebA attribute file."""
        lines = [line.rstrip() for line in open(self.attr_path, 'r')]
        all_attr_names = lines[1].split()
        for i, attr_name in enumerate(all_attr_names):
            self.attr2idx[attr_name] = i
            self.idx2attr[i] = attr_name

        lines = lines[2:]
        random.seed(1234)
        random.shuffle(lines)
        for i, line in enumerate(lines):
            split = line.split()
            filename = split[0]
            values = split[1:]

            label = []
            for attr_name in self.selected_attrs:
                idx = self.attr2idx[attr_name]
                label.append(values[idx] == '1')

            if (i+1) < 2000:
                self.test_dataset.append([filename, label])
            else:
                self.train_dataset.append([filename, label])

        print('Finished preprocessing the CelebA dataset...')

    def __getitem__(self, index):
        """Return one image and its corresponding attribute label."""
        dataset = self.train_dataset if self.mode == 'train' else self.test_dataset
        filename, label = dataset[index]
        image = Image.open(os.path.join(self.image_dir, filename))
        return self.transform(image), torch.FloatTensor(label)

    def __len__(self):
        """Return the number of images."""
        return self.num_images


def get_loader(image_dir, attr_path, selected_attrs, crop_size=178, image_size=128, 
               batch_size=16, dataset='CelebA', mode='train', num_workers=1):
    """Build and return a data loader."""
    transform = []
    if mode == 'train':
        transform.append(T.RandomHorizontalFlip())
    transform.append(T.CenterCrop(crop_size))
    transform.append(T.Resize(image_size))
    transform.append(T.ToTensor())
    transform.append(T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))
    transform = T.Compose(transform)

    if dataset == 'CelebA':
        dataset = CelebA(image_dir, attr_path, selected_attrs, transform, mode)
    elif dataset == 'RaFD':
        dataset = ImageFolder(image_dir, transform)

    data_loader = data.DataLoader(dataset=dataset,
                                  batch_size=batch_size,
                                  shuffle=(mode=='train'),
                                  num_workers=num_workers)
    return data_loader

mortonjt · October 29, 2019, 3:14pm

Just to confirm, I can across a similar issue with the dataloader.

@smth @ptrblck, this seems to be an edge case where getting the last batch could result in some dimension mismatching, and it’s not ideal to have custom reshaping code to correct for this. Is it possible to have a drop_the_last_batch option in the dataloader constructor? That way all code downstream could assume that the batch size will be strictly consistent.

EDIT: I think I spoke too soon – looks like there is a drop_last option in the dataloader. So that may help with the batch issues. https://pytorch.org/docs/stable/data.html