Failed to load images from getitem occasionally

KleinXin · April 12, 2022, 2:08pm

I want to add some custom online augmetation to the trainning data. So I tried to write the Dataset Class of my own. In the codes below, I use opencv to load the image from local disk but it occasionally failed. I checked the images I saved in ‘debug’ folder. The size of some of the images is zero. The error message is also shown below. Pytorch version 1.10 and Torchvision version 0.11.1.

import torch
import PIL
from PIL import Image
import os
import pandas as pd
import math
import numpy as np
import torchvision.datasets as tvdataset

import pdb
import cv2
from torch.utils.data import Dataset

class HotelID(Dataset):
      def __init__(self,img_root_dir,mask_dir,transform):

         self.img_root_dir = img_root_dir
         
         self.transform = transform

         self.data_list = []

         folders = os.listdir(img_root_dir)
         folders = sorted(folders)

         for folder_idx,folder_ in enumerate(folders):

             files = os.listdir(os.path.join(img_root_dir,folder_))
             files = sorted(files)

             for file in files:
                 self.data_list.append([os.path.join(img_root_dir,folder_,file),folder_idx])

         self.num_imgs = len(self.data_list)

      def __getitem__(self,idx):

          img_path = self.data_list[idx][0]
          label = self.data_list[idx][1]

          img_name = img_path.split('/')[-1]
          img_folder = img_path.split('/')[-2]

          I = cv2.imread(img_path)

          cv2.imwrite(os.path.join('./debug',img_folder+'_'+img_name),I)

          img_pil_out = Image.fromarray(I[:,:,::-1])

          img_pil_out.save(os.path.join('./debug',img_folder+'_'+img_name[:-4]+'_pil.jpg'))

          img_pil = self.transform(img_pil_out)

          return img_pil,label

      def __len__(self):
         return len(self.data_list)
          

def get_dataset(conf):

    conf['num_class'] = 3116
    
    transform_train = transforms.Compose([
        transforms.RandomCrop((conf.crop_size,conf.crop_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    
    transform_test = transforms.Compose([
        transforms.CenterCrop((conf.crop_size,conf.crop_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    ds_train = HotelID(conf.train_folder,conf.mask_folder,transform_train)
    #ds_train = tvdataset.ImageFolder(conf.train_folder, transform=transform_train)
    ds_test = tvdataset.ImageFolder(conf.test_folder, transform=transform_test)

    return ds_train,ds_test

for i, (input, target) in enumerate(trainloader):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 521, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
    return self._process_data(data)
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
    data.reraise()
  File "/usr/local/lib/python3.6/dist-packages/torch/_utils.py", line 434, in reraise
    raise exception
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "datasets/hotel_id_2022.py", line 109, in __getitem__
    img_pil = self.transform(img_pil_out)
  File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 61, in __call__
    img = t(img)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 642, in forward
    i, j, h, w = self.get_params(img, self.size)
  File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 597, in get_params
    if h + 1 < th or w + 1 < tw:
TypeError: '<' not supported between instances of 'int' and 'NoneType'

I checked those images and wrote a test scripts to load those images. It works fine.

The I add try … except and randomly choose another image when one image is failed to be loaded. The codes are shown below. Nevertheless, it stuck in the while loop and show the error message as shown below. From the error message, it can be known that another image is definetely been chosen. But all images are failed to be loaded. Anyone knows what happens?

import torch
import PIL
from PIL import Image
import os
import pandas as pd
import math
import numpy as np
import torchvision.datasets as tvdataset

import pdb
import cv2
from torch.utils.data import Dataset

class HotelID(Dataset):
      def __init__(self,img_root_dir,mask_dir,transform):

         self.img_root_dir = img_root_dir
         
         self.transform = transform

         self.data_list = []

         folders = os.listdir(img_root_dir)
         folders = sorted(folders)

         for folder_idx,folder_ in enumerate(folders):

             files = os.listdir(os.path.join(img_root_dir,folder_))
             files = sorted(files)

             for file in files:
                 self.data_list.append([os.path.join(img_root_dir,folder_,file),folder_idx])

         self.num_imgs = len(self.data_list)

      def __getitem__(self,idx):

          img_path = self.data_list[idx][0]
          label = self.data_list[idx][1]

          fail_flag = False

          img_name = img_path.split('/')[-1]
          img_folder = img_path.split('/')[-2]
          
          while True:
              try:

                 if fail_flag == True:
                    idx_ = np.random.choice(self.num_imgs,1,replace=False)[0]
                    img_path = self.data_list[idx_][0]
                    label = self.data_list[idx_][1]
                    img_name = img_path.split('/')[-1]
                    img_folder = img_path.split('/')[-2]
                    print(f"{img_name}\t{label}")

                 I = cv2.imread(img_path)

                 img_pil_ = Image.fromarray(I[:,:,::-1])
                 img_pil = self.transform(img_pil_)
                
                 print(f"{img_path}")
                 break

              except Exception as e:
                     fail_flag = True
                     print(f"{repr(e)}")
                     continue
          
          cv2.imwrite(os.path.join('./debug',img_folder+'_'+img_name),I)

          #img_pil_.save(os.path.join('./debug',img_folder+'_'+img_name[:-4]+'_pil.jpg'))

          return img_pil,label

      def __len__(self):
         return len(self.data_list)
          

def get_dataset(conf):

    conf['num_class'] = 3116

    transform_train = transforms.Compose([
        transforms.RandomCrop((conf.crop_size,conf.crop_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    
    transform_test = transforms.Compose([
        transforms.CenterCrop((conf.crop_size,conf.crop_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    ds_train = HotelID(conf.train_folder,conf.mask_folder,transform_train)
    #ds_train = tvdataset.ImageFolder(conf.train_folder, transform=transform_train)
    ds_test = tvdataset.ImageFolder(conf.test_folder, transform=transform_test)

    return ds_train,ds_test

000029619.jpg   813
000002340.jpg   2313
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000022355.jpg   1900
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000044926.jpg   381
000018797.jpg   2727
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000023085.jpg   2375
000020744.jpg   852
000022277.jpg   2428
000040648.jpg   1594
000032900.jpg   874
000040367.jpg   174
000015278.jpg   1025
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000034317.jpg   3016
000025976.jpg   2082

AlphaBetaGamma96 · April 12, 2022, 2:11pm

The line below is failing because it expects all terms to be ints. However, as you load in an empty image it’s most likely defaulting to type None, and hence throws the error. Try removing those empty images from your directory and re-run your code. The error should go away.

KleinXin · April 12, 2022, 2:18pm

I checked those images and wrote a test scripts to load those images. It works fine.
I tried to use try … except to omit those images that are failed to be loaded. It also does not work. I will paste my codes after a while.

AlphaBetaGamma96 · April 12, 2022, 3:59pm

Can you check the output when it loads the empty images? It most likely returning type None. Also, why do you need empty image files in a directory?

KleinXin · April 12, 2022, 4:22pm

I print the shape of I and img_pil_out. They are all fine.

I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
I.shape: (256, 256, 3)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
img_pil_out.size: (256, 256)
Traceback (most recent call last):
  File "main.py", line 123, in <module>
    main(conf)
  File "main.py", line 85, in main

I also checked all the empty images in the input debug folder. Their corresponding images in the input directory are not empty. I also wrote a test script to load them from the input image.

For example, image whose name is 000023821.jpg in folder 203152. It is empty in debug folder, but it is not empty in input directory.

AlphaBetaGamma96 · April 12, 2022, 5:53pm

One thing you could try is that the line from where the error is originating is this line here.

Some images work fine, and sometimes other images don’t. Potentially look at the images before they’re passed into self.transform(img_pil_out) via somthing like plt.imshow and see if there’s anything wrong with like NaN or None etc.

If I had to guess the precise error, th or tw is of type None most likely due to a fault in the input image as this is a standard function! Print out the images via matplotlib.pyplot.imshow and see if they’re ok. (Not via loading them from the directory)

An example would be,

import matplotlib.pyplot as plt

plt.imshow(img_pil_out)
plt.show()

You might find that it’s saved as an array of shape (256,256) but has all entries of None for example.

KleinXin · April 15, 2022, 3:15pm

I am now working on a server which I access with MobaXterm. So I could not show the image with GUI. Instead I add some lines that do the transformation to the image to the end of init function. It has no problems with the two images saved by opencv and pillow. But ‘self.transform’ still gives the same error as below.

class HotelID(Dataset):
      def __init__(self,img_root_dir,mask_dir,transform):

         self.img_root_dir = img_root_dir
         
         self.transform = transform

         self.data_list = []

         folders = os.listdir(img_root_dir)
         folders = sorted(folders)

         for folder_idx,folder_ in enumerate(folders):

             files = os.listdir(os.path.join(img_root_dir,folder_))
             files = sorted(files)

             for file in files:
                 self.data_list.append([os.path.join(img_root_dir,folder_,file),folder_idx])

         self.num_imgs = len(self.data_list)
 
         img_folder = self.data_list[0][0].split('/')[-2]
         img_name = self.data_list[0][0].split('/')[-1]

         I = cv2.imread(self.data_list[0][0])
         cv2.imwrite(os.path.join('./debug',img_folder+'_'+img_name[:-4]+'_opencv.jpg'),I)
         I_pil = Image.fromarray(I[:,:,::-1])
         I_pil.save(os.path.join('./debug',img_folder+'_'+img_name[:-4]+'_pil.jpg')) 

        self.I = self.transform(I_pil)

File "datasets/hotel_id_2022.py", line 215, in get_dataset
    ds_train = HotelID(conf.train_folder,conf.mask_folder,transform_train)
  File "datasets/hotel_id_2022.py", line 96, in __init__
    self.I = self.transform(I_pil)
  File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 61, in __call__
    img = t(img)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 642, in forward
    i, j, h, w = self.get_params(img, self.size)
  File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 597, in get_params
    if h + 1 < th or w + 1 < tw:
TypeError: '<' not supported between instances of 'int' and 'NoneType'

It looks so weird. Do you know why?

KleinXin · April 15, 2022, 3:41pm

Maybe it is the problem of the transform I am using now?

transform_train = transforms.Compose([
        transforms.RandomCrop((conf.crop_size,conf.crop_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

Failed to load images from __getitem__ occasionally

Failed to load images from getitem occasionally