TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found object, but the output is torch.Tensor

Hi everyone, I’m trying to do some classification for the CelebA dataset, but I’m encountering an error with trying to load my data.

The code is:

class MultiClassCelebA(Dataset):
    
    def __init__(self, dataframe, folder_dir, transform=None, target_transform=None):
        
        self.dataframe = dataframe
        self.folder_dir = folder_dir
        self.transform = transform
        self.target_transform = target_transform
        self.file_names = dataframe.index
        self.labels = dataframe.labels.values.tolist()
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, index):
        
        image = Image.open(os.path.join(self.folder_dir, self.file_names[index]))
        label = self.labels[index][0]
        if self.transform is not None:
            image = self.transform(image)
        if self.target_transform is not None:
            label = self.target_transform(label)
        return image, label

tfms = transforms.Compose([transforms.Resize((256, 256)),
                           transforms.PILToTensor()])

train_dl = MultiClassCelebA(train_df, celeb_path + '/train/', transform=tfms)
val_dl = MultiClassCelebA(val_df, celeb_path + '/val/', transform=tfms)

train_dataloader = DataLoader(train_dl, shuffle = True, batch_size = 32)
val_dataloader = DataLoader(val_dl, shuffle = True, batch_size = 32)
next(iter(train_dataloader))

but I’m getting the above TypeError from the last line. I’ve checked the output of the dataset, e.g. for index 2

type(train_dl[2][0])

and it confirms that it is a torch.Tensor. Why is the dataloader seeing it as an Object and how can I fix this? Thanks

I’m unsure what might be causing the issue. Creating a random PIL.Image inside the Dataset works fine:

class MultiClassCelebA(Dataset):
    def __init__(self, dataframe, folder_dir, transform=None, target_transform=None):
        self.transform = transform
        self.target_transform = target_transform
        
    def __len__(self):
        return 10
    
    def __getitem__(self, index):
        image = transforms.ToPILImage()(torch.randn(3, 224, 224))
        label = torch.randint(0, 10, (1,))
        if self.transform is not None:
            image = self.transform(image)
        if self.target_transform is not None:
            label = self.target_transform(label)
        return image, label

tfms = transforms.Compose([transforms.Resize((256, 256)),
                           transforms.PILToTensor()])

train_dl = MultiClassCelebA(None, None, transform=tfms)

train_dataloader = DataLoader(train_dl, shuffle = True, batch_size = 32)
next(iter(train_dataloader))

Fixed the issue in getitem by return image, label.float()