RuntimeError: stack expects each tensor to be equal size, but got [3, 128, 128] at entry 0 and [1, 128, 128] at entry 5

I am trying to implement pretrained densenet121 on a image classification task with 789 images and 3 labels and received an error. I don’t understand what caused the layer size to change. Below is my code.

# Import libraries
import pandas as pd # For importing dataset
import numpy as np # For matrix operation
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cv2
import matplotlib.pyplot as plt
import torchvision
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms, models
import copy
import tqdm
from PIL import Image

%matplotlib inline

mix_dataset_dir = '/kaggle/input/complete-mix-dataset/Mix_images'
mix_dataset_files = os.listdir(mix_dataset_dir)

print(mix_dataset_files)

Below are few images names I have shared for reference

['Teenagers (95).jpg',
 'Adults (254).jpg',
 'Teenagers (108).jpg',
 'Teenagers (175).jpg',
 'Teenagers (126).jpg',

Code continued

class MixDataset(Dataset):
    def __init__(self, file_list, dir, mode='train', transform = None):
        self.file_list = file_list
        self.dir = dir
        self.mode= mode
        self.transform = transform
        if self.mode == 'train':
            if 'Adults' in self.file_list[0]:
                self.label = 1
            elif 'Teenagers' in self.file_list[0]:
                self.label = 2
            else:
                self.label = 3
            
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.dir, self.file_list[idx]))
        if self.transform:
            img = self.transform(img)
        if self.mode == 'train':
            img = img.numpy()
            return img.astype('float32'), self.label
        else:
            img = img.numpy()
            return img.astype('float32'), self.file_list[idx]

data_transform = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.ColorJitter(),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((128,128)),
    transforms.ToTensor()
])

mix = MixDataset(mix_dataset_files, mix_dataset_dir , transform = data_transform)

densenet_model = models.densenet121(pretrained = True)
densenet_model

for param in densenet_model.parameters():
    param.requires_grad = False
    
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(1024, 512)),
    ('relu1', nn.ReLU()),
    ('fc2', nn.Linear(512, 256)),
    ('relu2', nn.ReLU()),
    ('fc3', nn.Linear(256, 2)),
    ('output', nn.LogSoftmax(dim = 1))
]))

densenet_model.classifier = classifier

criterion = nn.NLLLoss()

optimizer = optim.Adam(densenet_model.classifier.parameters(), lr = 0.003)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200, 300], gamma=0.5)

train_loader = DataLoader(mix, batch_size=32, shuffle = True)
densenet_model.to('cuda')
epochs = 3
itr = 1
p_itr = 200
densenet_model.train()
total_loss = 0
loss_list = []
acc_list = []
for epoch in range(epochs):
    for samples, labels in train_loader:
        samples, labels = samples.to('cuda'), labels.to('cuda')
        optimizer.zero_grad()
        output = densenet_model(samples)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        scheduler.step()
        
        if itr%p_itr == 0:
            pred = torch.argmax(output, dim=1)
            correct = pred.eq(labels)
            acc = torch.mean(correct.float())
            print('[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Accuracy: {:.3f}'.format(epoch+1, epochs, itr, total_loss/p_itr, acc))
            loss_list.append(total_loss/p_itr)
            acc_list.append(acc)
            total_loss = 0
            
        itr += 1

plt.plot(loss_list, label='loss')
plt.plot(acc_list, label='accuracy')
plt.legend()
plt.title('training loss and accuracy')
plt.show()

Below is the complete error log which I am getting

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-15-0e23ed2d21f0> in <module>
      9 acc_list = []
     10 for epoch in range(epochs):
---> 11     for samples, labels in train_loader:
     12         samples, labels = samples.to('cuda'), labels.to('cuda')
     13         optimizer.zero_grad()

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     45         else:
     46             data = self.dataset[possibly_batched_index]
---> 47         return self.collate_fn(data)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     77     elif isinstance(elem, container_abcs.Sequence):
     78         transposed = zip(*batch)
---> 79         return [default_collate(samples) for samples in transposed]
     80 
     81     raise TypeError(default_collate_err_msg_format.format(elem_type))

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py in <listcomp>(.0)
     77     elif isinstance(elem, container_abcs.Sequence):
     78         transposed = zip(*batch)
---> 79         return [default_collate(samples) for samples in transposed]
     80 
     81     raise TypeError(default_collate_err_msg_format.format(elem_type))

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     62                 raise TypeError(default_collate_err_msg_format.format(elem.dtype))
     63 
---> 64             return default_collate([torch.as_tensor(b) for b in batch])
     65         elif elem.shape == ():  # scalars
     66             return torch.as_tensor(batch)

/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     53             storage = elem.storage()._new_shared(numel)
     54             out = elem.new(storage)
---> 55         return torch.stack(batch, 0, out=out)
     56     elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
     57             and elem_type.__name__ != 'string_':

RuntimeError: stack expects each tensor to be equal size, but got [3, 128, 128] at entry 0 and [1, 128, 128] at entry 5

I have also gone through this link ‘RuntimeError: stack expects each tensor to be equal size, but got [3, 224, 224] at entry 0 and [3, 224, 336] at entry 3’ which fix the same problem I am facing and incorporated the changes but this thing also does not help.

Please let me know where am I going wrong.

Hi,
There might be grayscale images in your dataset as you can see in the error, some of images have 3 channels, some of them 1. Try to add a transform to convert colored images to grayscale or vice versa.

Bests

1 Like

Try resizing the tensor before feeding into your model.

Error is regarding channel size not height and width and also, there is a resize operation as you can see height and width in both entries are the same where = 128