RuntimeError: stack expects each tensor to be equal size, but got [3, 224, 224] at entry 0 and [3, 224, 336] at entry 3

Im trying to implement pretrained resnet50 on a image classification task with 42 labels and received this error. I dont understand what caused the layer size to change. Below is my code, i stitched them up from different tutorials I can find.

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset


class CustomDataset(Dataset):
    def __init__(self, data, path , transform = None):
        super().__init__()
        self.data = data.values
        self.path = path
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,index):
        img_name,label = self.data[index]
        img_path = self.path / img_name
        image = img.imread(img_path)
        if self.transform is not None:
            image = self.transform(image)
        return image, label

label_names = [ '0' + str(i) if len(str(i)) == 1 else str(i) for i in list(range(42))]


def plot_images(images, cls_true, cls_pred=None):
    """
    Adapted from https://github.com/Hvass-Labs/TensorFlow-Tutorials/
    """
    fig, axes = plt.subplots(3, 3)

    for i, ax in enumerate(axes.flat):
        # plot img
        ax.imshow(images[i, :, :, :], interpolation='spline16')

        # show true & predicted classes
        cls_true_name = label_names[cls_true[i]]
        if cls_pred is None:
            xlabel = "{0} ({1})".format(cls_true_name, cls_true[i])
        else:
            cls_pred_name = label_names[cls_pred[i]]
            xlabel = "True: {0}\nPred: {1}".format(
                cls_true_name, cls_pred_name
            )
        ax.set_xlabel(xlabel)
        ax.set_xticks([])
        ax.set_yticks([])

    plt.show()

def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           img_size,
                           valid_size=0.1,
                           shuffle=True,
                           show_sample=False,
                           num_workers=4,
                           pin_memory=False):
    """
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - augment: whether to apply the data augmentation scheme
      mentioned in the paper. Only applied on the train split.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
    - shuffle: whether to shuffle the train/validation indices.
    - show_sample: plot 9x9 sample grid of the dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=torchvision.transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(0.3),
            transforms.ToTensor()])
    )

    valid_dataset = train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=valid_transform
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    # visualize some images
    if show_sample:
        sample_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=9, shuffle=shuffle,
            num_workers=num_workers, pin_memory=pin_memory,
        )
        data_iter = iter(sample_loader)
        images, labels = data_iter.next()
        X = images.numpy().transpose([0, 2, 3, 1])
        plot_images(X, labels)

    return (train_loader, valid_loader)

def get_test_loader(data_dir,
                    batch_size,
                    data_csv,
                    shuffle=True,
                    num_workers=4,
                    pin_memory=False):
    """
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - shuffle: whether to shuffle the dataset after every epoch.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - data_loader: test set iterator.
    """
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])

    dataset = CustomDataset(data_csv, data_dir, transform)

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    return data_loader

batch_size = 64 #recommended to use 64 as batch size
img_size = 224 #make this smaller if having hardware trouble, better use even number

train_loader, valid_loader = get_train_valid_loader(train_dir, 
                                                    batch_size=batch_size, 
                                                    augment=True,
                                                    random_seed=5,
                                                    img_size=img_size,
                                                    valid_size=0.1,
                                                    shuffle=True,
                                                    show_sample=True,
                                                    num_workers=1,
                                                    pin_memory=True)

test_loader = get_test_loader(test_dir,
                              batch_size=batch_size,
                              data_csv=test_csv,
                              shuffle=True,
                              num_workers=1,
                              pin_memory=True)

from torchvision import models
from torch.autograd import Variable
from torch.optim import lr_scheduler
import torch.optim as optim

def train_model(dataloders, model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    use_gpu = torch.cuda.is_available()
    best_model_wts = model.state_dict()
    best_acc = 0.0
    dataset_sizes = {'train': len(dataloders['train'].dataset), 
                     'valid': len(dataloders['valid'].dataset)}

    for epoch in range(num_epochs):
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloders[phase]:
                if use_gpu:
                    inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.data
                running_corrects += torch.sum(preds == labels.data)
                
            
            if phase == 'train':
                train_epoch_loss = running_loss / dataset_sizes[phase]
                train_epoch_acc = running_corrects / dataset_sizes[phase]
                print(f'Epoch: {epoch}, Accuracy: {train_epoch_acc}')
            else:
                valid_epoch_loss = running_loss / dataset_sizes[phase]
                valid_epoch_acc = running_corrects / dataset_sizes[phase]
                
            if phase == 'valid' and valid_epoch_acc > best_acc:
                best_acc = valid_epoch_acc
                best_model_wts = model.state_dict()

        print('Epoch [{}/{}] train loss: {:.4f} acc: {:.4f} ' 
              'valid loss: {:.4f} acc: {:.4f}'.format(
                epoch, num_epochs - 1,
                train_epoch_loss, train_epoch_acc, 
                valid_epoch_loss, valid_epoch_acc))
            
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 42)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
dloaders = {'train':train_loader, 'valid':valid_loader}
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(dloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=5)

This is the whole error code:

RuntimeError                              Traceback (most recent call last)

<ipython-input-46-3bd8318057b1> in <module>()
      1 model_ft = train_model(dloaders, model_ft, criterion, optimizer_ft, exp_lr_scheduler,
----> 2                        num_epochs=5)

4 frames

/usr/local/lib/python3.6/dist-packages/torch/_utils.py in reraise(self)
    393             # (https://bugs.python.org/issue2651), so we work around it.
    394             msg = KeyErrorMessage(msg)
--> 395         raise self.exc_type(msg)

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py", line 79, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py", line 79, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py", line 55, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [3, 224, 224] at entry 0 and [3, 224, 336] at entry 3

Any pointer on how to resolve this? Thank you!

3 Likes

You can add a manual transform of resizing ie transforms.Resize(), inside transforms.Compose() at the end. This way, you ensure that all the images you stack will end up at the same size.

I resized it in the get_train_valid_loader function or can I still resize it again before passing the input images to the model?

Dataloader function

def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           img_size,
                           valid_size=0.1,
                           shuffle=True,
                           show_sample=False,
                           num_workers=4,
                           pin_memory=False):
    """
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - augment: whether to apply the data augmentation scheme
      mentioned in the paper. Only applied on the train split.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
    - shuffle: whether to shuffle the train/validation indices.
    - show_sample: plot 9x9 sample grid of the dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(0.3),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=torchvision.transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(0.3),
            transforms.ToTensor()])
    )

    valid_dataset = train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=valid_transform
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    # visualize some images
    if show_sample:
        sample_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=9, shuffle=shuffle,
            num_workers=num_workers, pin_memory=pin_memory,
        )
        data_iter = iter(sample_loader)
        images, labels = data_iter.next()
        X = images.numpy().transpose([0, 2, 3, 1])
        plot_images(X, labels)

    return (train_loader, valid_loader)

Train model function

def train_model(dataloders, model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    use_gpu = torch.cuda.is_available()
    best_model_wts = model.state_dict()
    best_acc = 0.0
    dataset_sizes = {'train': len(dataloders['train'].dataset), 
                     'valid': len(dataloders['valid'].dataset)}

    for epoch in range(num_epochs):
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloders[phase]:
                if use_gpu:
                    inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.data
                running_corrects += torch.sum(preds == labels.data)
                
            
            if phase == 'train':
                train_epoch_loss = running_loss / dataset_sizes[phase]
                train_epoch_acc = running_corrects / dataset_sizes[phase]
                print(f'Epoch: {epoch}, Accuracy: {train_epoch_acc}')
            else:
                valid_epoch_loss = running_loss / dataset_sizes[phase]
                valid_epoch_acc = running_corrects / dataset_sizes[phase]
                
            if phase == 'valid' and valid_epoch_acc > best_acc:
                best_acc = valid_epoch_acc
                best_model_wts = model.state_dict()

        print('Epoch [{}/{}] train loss: {:.4f} acc: {:.4f} ' 
              'valid loss: {:.4f} acc: {:.4f}'.format(
                epoch, num_epochs - 1,
                train_epoch_loss, train_epoch_acc, 
                valid_epoch_loss, valid_epoch_acc))
            
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

thanks

Doesn’t matter when you resize it.
From what I have seen, most of them resize the images during the transformation phase, ie inside the transforms.Compose()

Okay, but isn’t it odd. I already included the transforms.Resize() in the transforms.Compose() as seen here

# define transforms
    if augment:
        train_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(0.3),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=torchvision.transforms.Compose([
            transforms.Resize(img_size),
            transforms.RandomHorizontalFlip(0.3),
            transforms.ToTensor()])
    )

    valid_dataset = train_dataset = datasets.ImageFolder(
        root=train_dir,
        transform=transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            normalize,])
        )

I even checked the image size as i defined the batch_size to be 64 and img_size to be 224

IN: trainimages, trainlabels = next(iter(train_loader))
IN: trainimages.shape
OUT: torch.Size([64, 3, 224, 224])

So you mean to say, even if the transforms.Resize() is included in transforms.Compose(), there is still an error of size mismatch ?

1 Like

Yup! That’s why I asked If i should include another transform right before passing the images to the model

Hey,I am getting the same error as @Flint even after doing transforms.Resize(). Did you solve the error?

1 Like

Unfortunately no, have you thought of a solution yet?

Note that Resize will behave differently on input images with a different height and width.
From the docs:

size ( sequence or int ) – Desired output size. If size is a sequence like (h, w), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size)

If you are dealing with such images, pass the size argument as a tuple:

transforms.Resize((img_size, img_size))

CC @pr6dA

19 Likes
Traceback (most recent call last):
  File "main/train.py", line 40, in <module>
    trainer.train(epoch)
  File "/home/redarknight/projects/p2s/main/../lib/core/base.py", line 151, in train
    for i, (img_joint, gt_mesh, gt_h36m_joint, gt_coco_joint, part_seg) in enumerate(batch_generator):
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/tqdm/std.py", line 1097, in __iter__
    for obj in iterable:
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 345, in __next__
    data = self._next_data()
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 838, in _next_data
    return self._process_data(data)
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 881, in _process_data
    data.reraise()
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/_utils.py", line 395, in reraise
    raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in DataLoader worker process 11.
Original Traceback (most recent call last):
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 79, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 64, in default_collate
    return default_collate([torch.as_tensor(b) for b in batch])
  File "/home/redarknight/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 55, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [16, 192, 144] at entry 0 and [14, 192, 144] at entry 45

Having similar error now and it has nothing to do with ‘Resize’.
I changed the input returned in getitem() from a numpy array with shape [14, 192, 144] to an array with [16, 192, 144].
Now strangely, the dataloader outputs the above error

How did you change this shape? Are you creating these arrays inside the __getitem__ method or are you indexing/slicing them? In the latter case, could the “last” slice be smaller?

Is there a way to have batch with tensors of different size in it? Like entry 0 : [4, 475, 320] and entry
1: [4, 256, 256]
Because I think that fully convolutional network like UNet can handle different shape of input so I thought it would be a good idea to give different shape of input for the training?

You would have to pad or resize the tensors to create a single batch of tensors. There is an ongoing effort to implement nested tensors, which would support variable shaped tensors, but I’m unsure in which state it is at the moment.

1 Like

Great thank you very much!

Hi @ptrblck I’m getting this
I read the file and resize it using cv2
RuntimeError: stack expects each tensor to be equal size, but got [256, 256, 3] at entry 0 and [256, 256] at entry 1

Based on the error message it seems that the second image tensor is a grayscale image (single channel), while the first one contains 3 channels.
You could either transform both images to grayscale or RGB to create matching shapes.

1 Like

Thank you so much sir

HI @ptrblck, I am working on CNN to make high-scale images from the low-scale images. Now I have tons of images of different sizes. Can I make a batch with different sizes tensor? Because I am getting the same error.

You can create a “batch” of tensors with different shapes by using e.g. a list (and a custom collate_fn in the DataLoader). However, you won’t be able to pass this list of tensors to the model directly and would either have to pass them one by one or create a single tensor after cropping/padding the tensors.
I don’t know how far the implementation of nested tensors is, but this utility would allow you to use a tensor object containing differently shaped tensors internally.