I am quite new to PyTorch. I am working on multiclass classification. The images of all the classes are present under single folder. I used a custom loader to create sample having the image and its respective label as follows.
class DFU_Dataset(Dataset):
def __init__(self, root_dir, csv, transform,loader=pil_loader):
self.root_dir = root_dir
self.labels = pd.read_csv(csv)
self.transform = transform
self.loader = loader
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir , self.labels.iloc[idx,0])
image = self.loader(img_name)
label = np.argmax(self.labels.loc[idx, 'none':'both'].values)
label = torch.from_numpy(np.asarray(label))
# Transform
if self.transform is not None:
image = self.transform(image)
sample = {'image': image , 'label': label}
return sample
transform = transforms.Compose([transforms.ToTensor()]) #populate later
DFU_Dataset = DFU_Dataset(root_dir = '/Users/sidraaleem/Documents/code/DFU/Labelled_test_images',
csv = '/Users/sidraaleem/Documents/code/DFU/Labelled_data_ground_truth.csv',
transform = transform
)
Now I am trying to split the dataset to train and test set as follows:
dataset_size = len(DFU_Dataset)
indices = list(range(dataset_size))
np.random.seed(42) # not working
#random.seed(230)
np.random.shuffle(dataset_indices)
split = int(np.floor(0.2 * dataset_size))
np.random.shuffle(indices)
train_idx, test_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
test_sampler = SubsetRandomSampler(test_idx)
train_loader = DataLoader(dataset=DFU_Dataset, shuffle=False, sampler=train_sampler)
test_loader = DataLoader(dataset=DFU_Dataset, shuffle=False, sampler=test_sampler)
print("train loader:",len(val_loader.dataset))
print("test loader",len(train_loader.dataset))
train loader: 5955
test loader 5955
However, as my custom loader is reading the whole of the data set, using DataLoader with sampler is not making any difference. And returning 5955, which are the total number of images in my data set
Do I need to create separate loaders for both the train and test sets? Though I need the exact same for both data sets. i.e. concatenating image name with the respective label.
Given my method of splitting is returning me only index from the original Dataset, how should I proceed in the new loader.