Error loading cityscape dataset

I am trying to load the cityscape dataset leftimg8bit and gtFine, downloaded from the website.
I am also using the DataLoader for city scape provided by PyTorch. However, I am getting the error " TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found
<class ‘PIL.PngImagePlugin.PngImageFile’> " when enumerating the loaded data.

I have also already made sure to use transforms.ToTensor()
Any idea what I did wrong here?

An excerpt of my code:

data_transforms_train = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    data_transforms_val = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    train_dataset = torchvision.datasets.Cityscapes(os.path.join(root_dir, 'data/cityscapes'),
        split='train', mode='fine', target_type='semantic', transform=data_transforms_train)
    val_dataset = torchvision.datasets.Cityscapes(os.path.join(root_dir, 'data/cityscapes'),
        split='val', mode='fine', target_type='semantic', transform=data_transforms_val)

    train_loader = data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=False,
        shuffle=True
    )
    val_loader = data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=True,
        shuffle=False
    )

Blockquote
Traceback (most recent call last):
File “/Users/xavier/Programming/FishNet/main_semantic_segmentation.py”, line 271, in
main()
File “/Users/xavier/Programming/FishNet/main_semantic_segmentation.py”, line 259, in main
train(train_loader, model, criterion, optimizer, epoch)
File “/Users/xavier/Programming/FishNet/main_semantic_segmentation.py”, line 99, in train
for i, data in enumerate(train_loader):
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/dataloader.py”, line 521, in next
data = self._next_data()
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/dataloader.py”, line 1203, in _next_data
return self._process_data(data)
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/dataloader.py”, line 1229, in _process_data
data.reraise()
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/_utils.py”, line 425, in reraise
raise self.exc_type(msg)
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py”, line 287, in _worker_loop
data = fetcher.fetch(index)
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py”, line 47, in fetch
return self.collate_fn(data)
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py”, line 84, in default_collate
return [default_collate(samples) for samples in transposed]
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py”, line 84, in
return [default_collate(samples) for samples in transposed]
File “/Users/xavier/miniforge3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py”, line 86, in default_collate
raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class ‘PIL.PngImagePlugin.PngImageFile’>

Thanks!

It is wired. I think you can try print(train_dataset[0]) to varify the output of your dataset.

it prints out

(tensor([[[-1.0733, -1.0733, -1.0904,  ..., -0.9363, -0.3883, -0.3883],
         [-1.0733, -1.0904, -1.0904,  ..., -0.8678, -0.3541, -0.3541],
         [-1.0904, -1.0904, -1.0904,  ..., -0.8164, -0.2684, -0.2856],
         ...,
         [-0.6623, -1.0048, -0.9192,  ..., -0.5424, -0.6965, -0.6965],
         [-0.6109, -0.9363, -1.0562,  ..., -0.5938, -0.7650, -0.8849],
         [-0.5424, -0.8164, -1.1247,  ..., -0.7993, -0.8164, -0.8507]],

        [[-0.6352, -0.6527, -0.6527,  ..., -0.6877, -0.1450, -0.1099],
         [-0.6527, -0.6702, -0.6527,  ..., -0.6176, -0.0924, -0.0749],
         [-0.6527, -0.6527, -0.6702,  ..., -0.5651, -0.0224, -0.0049],
         ...,
         [-0.3025, -0.6702, -0.6702,  ..., -0.2850, -0.4426, -0.4426],
         [-0.2850, -0.6001, -0.8102,  ..., -0.3375, -0.4951, -0.6001],
         [-0.2325, -0.4776, -0.8627,  ..., -0.5476, -0.5826, -0.6001]],

        [[-0.4450, -0.4275, -0.4450,  ..., -0.6541, -0.1312, -0.0441],
         [-0.4450, -0.4450, -0.4624,  ..., -0.6018, -0.0441,  0.0605],
         [-0.4450, -0.4624, -0.4624,  ..., -0.5495,  0.0256,  0.1651],
         ...,
         [-0.3927, -0.6890, -0.6715,  ..., -0.3927, -0.4973, -0.4973],
         [-0.3578, -0.6193, -0.7761,  ..., -0.4450, -0.5670, -0.6018],
         [-0.3404, -0.5495, -0.8284,  ..., -0.5670, -0.5844, -0.6018]]]), <PIL.PngImagePlugin.PngImageFile image mode=L size=2048x1024 at 0x12958A790>)

That error does look strange, could you post how you are iterating over the dataloader? I think just a little additional code would be enough to potentially reproduce the issue.

My entire training code would be like so

def train(train_loader, model, criterion, optimizer, epoch):
    train_loss = AverageMeter()
    model.train()

    for i, (inputs, labels) in enumerate(train_loader):
     
        
        N = inputs.size(0)
        inputs = torch.FloatTensor(inputs).to(device)
        labels = torch.LongTensor(labels).to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels.to(device)) / N
        loss.backward()
        optimizer.step()

        train_loss.update(loss.data[0], N)


    

It is clearly that this default dataset return the input tensor and semenstic labels, but the labels is a PIL Image.

You can write your own dataset to process the PIL Image. For example:

class MyCitySpace(torchvision.datasets.Cityspaces):
    def __getitem__(self, item):
        inputs, target = super().__getitem__(item)
        return inputs, np.array(target)
1 Like

Thanks! that works, additionally I also changed it data type to long using target.astype(np.long) due to requirements of PyTorch.