Num_workers in data loader

class CustomDataset(Dataset):
    def __init__(self, x_path, y_path):
        self.x_path = x_path
        self.y_path = y_path
        self.x_file_list = sorted(glob.glob(os.path.join(x_path, '*.npy')))
        self.y_file_list = sorted(glob.glob(os.path.join(y_path, '*.npy')))
        
        if len(self.x_file_list) != len(self.y_file_list):
            raise ValueError("Number of X and Y files must match.")
        
    def __len__(self):
        return len(self.x_file_list)

    def __getitem__(self, idx):
        x_data = np.load(self.x_file_list[idx])
        y_data = np.load(self.y_file_list[idx])
        x_tensor = torch.tensor(x_data)
        y_tensor = torch.tensor(y_data)
        return x_tensor, y_tensor



train_dataset= CustomDataset(path_train_data_x,path_train_data_y)
val_dataset=CustomDataset(path_val_data_x,path_val_data_y)


train_loader=DataLoader(dataset=train_dataset,batch_size=200,num_workers=4)
val_loader=DataLoader(dataset=val_dataset,batch_size=200,num_workers=4)

so this is my dataset class and dataloader, i am facing bottleneck problem in batch loading when i used default value for num_workers.

But as i increase num_workers , and try to get a single batch using next(iter(train_loader)) its just running like an infinite loop, and not loading a single batch.

I am using cpu with 16 cores, i7 gen 11. And i am loading images which are saved in .npy() of size 128x128.

Add the if-clause guard as described here and check if this would fix the issue.