I facing a memory issue. i was trying to use 500,000 images to train my model, but it can not load image before train my model.
At the beginning, i used ImageFolder to load dataset. i searched from forums, someone said i should use my own dataset class.
image_datasets = datasets.ImageFolder(dataset_dir, data_transforms['train'])
train_id, val_id = train_test_split(image_datasets, test_size=0.01)
train_dataloaders = torch.utils.data.DataLoader(train_id, batch_size=mc.batch_size,
shuffle=True, num_workers=4)
val_dataloaders = torch.utils.data.DataLoader(val_id, batch_size=mc.batch_size,
shuffle=True, num_workers=4)
Here is my own datasets code
class MyDataset(Dataset):
def __init__(self, root_dir, allfile, train=True, transform=None):
self.root_dir = root_dir
self.train = train
self.allfile = allfile
self.transform = transform
self.mc = Myconfig()
# self.length = allfile.shape
def __len__(self):
return self.allfile.shape[0]
def __getitem__(self, idx):
if self.train:
# address = os.path.join(self.root_dir, self.allfile[idx, 0])
img_dir = os.path.join(self.root_dir, self.allfile[idx, 0])
label = self.allfile[idx, 1]
label = label.astype(int)
label = np.array(label)
img = io.imread(img_dir).astype(np.uint8)
sample = {'image': img, 'label': label, 'address': img_dir}
else:
address = os.path.join(self.root_dir, self.allfile[idx, 0])
img_dir = address
img = io.imread(img_dir).astype(np.uint8)
sample = {'image': img, 'address': address}
if self.transform:
sample = self.transform(sample)
return sample
When i use my dataset
image_datasets = MyDataset(mc.dataset_dir, allfiles, transform=transforms.Compose([
Rescale((28, 28)),
ToTensor()
]))
i still face this problem.
Some one could give me any help?