Hi! I am a beginner in PyTorch. I have a folder containing thousands of 3-d segmented images (H x W x D x C) in .mat (Matlab files). I have searched for similar topics on PyTorch forum (e.g., this link), but my dataloader remains not workable. Specifically, elapsed time is too long when I call ‘real_batch = next(iter(dataloader))’. My dataloader is written as below. Can anyone offer any idea? Your time is highly appreciated. Many thanks!
import torch
import torch.utils.data
import scipy.io as spio
# Create the dataset
class customDataset(torch.utils.data.Dataset):
'''
Custom dataset for .mat
'''
def __init__(self, image_folder):
self.image_folder = os.path.abspath(image_folder)
self.image_list = os.listdir(self.image_folder)
def __getitem__(self, index):
image_path = self.image_list[index]
image = spio.loadmat(os.path.join(self.image_folder, image_path))['data']
return image
def __len__(self):
return len(self.image_list)
dataroot = r"C:\dataset"
batch_size = 16
workers = 2
dataset = customDataset(os.path.join(dataroot, 'images'))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
shuffle=True, num_workers=workers)
real_batch = next(iter(dataloader)) # Wrong: this line does not work