I want to add some custom online augmetation to the trainning data. So I tried to write the Dataset Class of my own. In the codes below, I use opencv to load the image from local disk but it occasionally failed. I checked the images I saved in ‘debug’ folder. The size of some of the images is zero. The error message is also shown below. Pytorch version 1.10 and Torchvision version 0.11.1.
import torch
import PIL
from PIL import Image
import os
import pandas as pd
import math
import numpy as np
import torchvision.datasets as tvdataset
import pdb
import cv2
from torch.utils.data import Dataset
class HotelID(Dataset):
def __init__(self,img_root_dir,mask_dir,transform):
self.img_root_dir = img_root_dir
self.transform = transform
self.data_list = []
folders = os.listdir(img_root_dir)
folders = sorted(folders)
for folder_idx,folder_ in enumerate(folders):
files = os.listdir(os.path.join(img_root_dir,folder_))
files = sorted(files)
for file in files:
self.data_list.append([os.path.join(img_root_dir,folder_,file),folder_idx])
self.num_imgs = len(self.data_list)
def __getitem__(self,idx):
img_path = self.data_list[idx][0]
label = self.data_list[idx][1]
img_name = img_path.split('/')[-1]
img_folder = img_path.split('/')[-2]
I = cv2.imread(img_path)
cv2.imwrite(os.path.join('./debug',img_folder+'_'+img_name),I)
img_pil_out = Image.fromarray(I[:,:,::-1])
img_pil_out.save(os.path.join('./debug',img_folder+'_'+img_name[:-4]+'_pil.jpg'))
img_pil = self.transform(img_pil_out)
return img_pil,label
def __len__(self):
return len(self.data_list)
def get_dataset(conf):
conf['num_class'] = 3116
transform_train = transforms.Compose([
transforms.RandomCrop((conf.crop_size,conf.crop_size)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
transform_test = transforms.Compose([
transforms.CenterCrop((conf.crop_size,conf.crop_size)),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
ds_train = HotelID(conf.train_folder,conf.mask_folder,transform_train)
#ds_train = tvdataset.ImageFolder(conf.train_folder, transform=transform_train)
ds_test = tvdataset.ImageFolder(conf.test_folder, transform=transform_test)
return ds_train,ds_test
for i, (input, target) in enumerate(trainloader):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 521, in __next__
data = self._next_data()
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
data.reraise()
File "/usr/local/lib/python3.6/dist-packages/torch/_utils.py", line 434, in reraise
raise exception
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "datasets/hotel_id_2022.py", line 109, in __getitem__
img_pil = self.transform(img_pil_out)
File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 61, in __call__
img = t(img)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 642, in forward
i, j, h, w = self.get_params(img, self.size)
File "/usr/local/lib/python3.6/dist-packages/torchvision/transforms/transforms.py", line 597, in get_params
if h + 1 < th or w + 1 < tw:
TypeError: '<' not supported between instances of 'int' and 'NoneType'
I checked those images and wrote a test scripts to load those images. It works fine.
The I add try … except and randomly choose another image when one image is failed to be loaded. The codes are shown below. Nevertheless, it stuck in the while loop and show the error message as shown below. From the error message, it can be known that another image is definetely been chosen. But all images are failed to be loaded. Anyone knows what happens?
import torch
import PIL
from PIL import Image
import os
import pandas as pd
import math
import numpy as np
import torchvision.datasets as tvdataset
import pdb
import cv2
from torch.utils.data import Dataset
class HotelID(Dataset):
def __init__(self,img_root_dir,mask_dir,transform):
self.img_root_dir = img_root_dir
self.transform = transform
self.data_list = []
folders = os.listdir(img_root_dir)
folders = sorted(folders)
for folder_idx,folder_ in enumerate(folders):
files = os.listdir(os.path.join(img_root_dir,folder_))
files = sorted(files)
for file in files:
self.data_list.append([os.path.join(img_root_dir,folder_,file),folder_idx])
self.num_imgs = len(self.data_list)
def __getitem__(self,idx):
img_path = self.data_list[idx][0]
label = self.data_list[idx][1]
fail_flag = False
img_name = img_path.split('/')[-1]
img_folder = img_path.split('/')[-2]
while True:
try:
if fail_flag == True:
idx_ = np.random.choice(self.num_imgs,1,replace=False)[0]
img_path = self.data_list[idx_][0]
label = self.data_list[idx_][1]
img_name = img_path.split('/')[-1]
img_folder = img_path.split('/')[-2]
print(f"{img_name}\t{label}")
I = cv2.imread(img_path)
img_pil_ = Image.fromarray(I[:,:,::-1])
img_pil = self.transform(img_pil_)
print(f"{img_path}")
break
except Exception as e:
fail_flag = True
print(f"{repr(e)}")
continue
cv2.imwrite(os.path.join('./debug',img_folder+'_'+img_name),I)
#img_pil_.save(os.path.join('./debug',img_folder+'_'+img_name[:-4]+'_pil.jpg'))
return img_pil,label
def __len__(self):
return len(self.data_list)
def get_dataset(conf):
conf['num_class'] = 3116
transform_train = transforms.Compose([
transforms.RandomCrop((conf.crop_size,conf.crop_size)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
transform_test = transforms.Compose([
transforms.CenterCrop((conf.crop_size,conf.crop_size)),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
ds_train = HotelID(conf.train_folder,conf.mask_folder,transform_train)
#ds_train = tvdataset.ImageFolder(conf.train_folder, transform=transform_train)
ds_test = tvdataset.ImageFolder(conf.test_folder, transform=transform_test)
return ds_train,ds_test
000029619.jpg 813
000002340.jpg 2313
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000022355.jpg 1900
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000044926.jpg 381
000018797.jpg 2727
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000023085.jpg 2375
000020744.jpg 852
000022277.jpg 2428
000040648.jpg 1594
000032900.jpg 874
000040367.jpg 174
000015278.jpg 1025
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
TypeError("'<' not supported between instances of 'int' and 'NoneType'",)
000034317.jpg 3016
000025976.jpg 2082