In my dataset, in the acquisition time, the name of images is sorted based on their condition. For example the first three images belongs to the same patient with different heartbeat. and the next three images are belong to the another patient …
I’m not sure how can I randomised cases to the train:validation:test. so my test set will be truly independent?
At the moment is snippet for splitting which is not randomize the data.
folder_data = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\imagesResized\\*.png")
folder_mask = glob.glob("D:\\Neda\\Pytorch\\U-net\\my_data\\labelsResized\\*.png") #labels_2nd_Resized_Binary
# split these path using a certain percentage
len_data = len(folder_data)
print("count of dataset: ", len_data)
# count of dataset: 992
split_1 = int(0.6 * len(folder_data))
split_2 = int(0.8 * len(folder_data))
#folder_data.sort()
train_image_paths = folder_data[:split_1]
print("count of train images is: ", len(train_image_paths))
valid_image_paths = folder_data[split_1:split_2]
print("count of validation image is: ", len(valid_image_paths))
test_image_paths = folder_data[split_2:]
print("count of test images is: ", len(test_image_paths))
train_mask_paths = folder_mask[:split_1]
valid_mask_paths = folder_mask[split_1:split_2]
test_mask_paths = folder_mask[split_2:]
train_dataset = CustomDataset(train_image_paths, train_mask_paths)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2)
valid_dataset = CustomDataset(valid_image_paths, valid_mask_paths)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True, num_workers=2)
test_dataset = CustomDataset(test_image_paths, test_mask_paths)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2)
dataLoaders = {
'train': train_loader,
'valid': valid_loader,
'test': test_loader,
}
if I add random.shuffle()
the files will sort randomly but how can I do the same for mask folder as train dataset should have correspond mask in there?
import os, random
I = os.listdir('D:/Neda/Pytorch/U-net/my_data/imagesResized')
random.shuffle(I)
print(I)