I have a custom Pytorch dataset and its corresponding dataloader. However, when I iterate over the dataloader, it seems like it skips the loop entirely. I have checked that the length of the dataloader is not 0.
I have a custom dataset which only given a folder lists all the images in the folder, and then returns the image and the filepath each time it is called.
class BBoxDataset(Dataset):
def __init__(self, path_list):
super().__init__()
self.img_dirs = path_list
self.update_img_paths()
def update_img_paths(self):
self.img_paths = []
for img_dir in self.img_dirs:
files = glob.glob(os.path.join(img_dir, "*.jpg"))
for file in files:
self.img_paths.append(file)
def __len__(self):
return len(self.img_paths)
def __getitem__(self, idx):
filepath = self.img_paths[idx]
img = Image.open(filepath).convert("RGB")
return img, filepath
I have created a custom dataloader for the same and a function to return the constructed dataloader.
def get_dataloader(image_paths, batch_size: int, shuffle: bool = dataset_settings.random_shuffle,
num_workers: int = 0, phase: str = 'infer', sampler: Sampler = None) -> None:
dataset = BBoxDataset(path_list=image_paths)
drop_last = False if phase == 'infer' else True
shuffle = False if phase == 'infer' else True
kwargs_dl = dict(num_workers=num_workers, batch_size=batch_size, pin_memory=True, shuffle=shuffle, drop_last=drop_last, sampler=sampler)
if sampler is not None:
kwargs_dl['sampler'] = sampler(dataset, shuffle=shuffle)
del kwargs_dl['shuffle']
dataloader = DataLoader(dataset, **kwargs_dl)
return dataloader
However, when I import the try to loop through the dataloader, it does not even enter the loop. In my case, the “MLN” is not printed, neither is the data from the batch.
infer_dataloader = get_dataloader(dataset_path, batch_size=1)
for batch, batch_data in enumerate(infer_dataloader):
print("MLN")
image, filepath = batch_data
print(image, filepath)
I am not sure where I am going wrong. Can anyone help?