Hi,
Im trying to load my custom dataset correctly using Dataset and Dataloaders
Splitting custom dataset in train/test data and creating train/test Dataloader does not pass the attributes of my dataset.
Based on the class CUSTOM_Dataset
I can have access to the attributes of the DATASET created.
It works fine!
DATASET.values
DATASET.label_1
DATASET.label_2
Splitting with torch.utils.data.random_split
, creates 2 sets that don’t inherit attributes from CUSTOM_Dataset.
I want to be able to play with the test_set created on last line of code. Having access to:
test_set.values
test_set.label_1
test_set.label_2
is important. But i get the Error:
> AttributeError: 'Subset' object has no attribute 'values'
I can create empty lists and iterate over test_set and extract features
→ for i,_,_ in test_set
:blah bblah , But this is very annoying.
I also have the same problem with train_loader, TEST_loader.
import torch
from torch.utils.data import Dataset, DataLoader
class CUSTOM_Dataset( Dataset ):
def __init__(self, values, label_1, label_2, transform=None):
# torch.tensor() always copies data.
# If you have a NumPy ndarray and want to avoid a copy, use torch.from_numpy().
self.values = torch.from_numpy(values).float()
self.label_1 = torch.from_numpy(label_1).int()
self.label_2 = torch.from_numpy(label_2).int()
self.transform = transform
def __getitem__(self, index):
x = self.values[index]
y1 = self.label_1[index]
y2 = self.label_2[index]
if self.transform:
x = self.transform(x)
return x, y1, y2
def __len__(self):
return len(self.values)
# Setup Pytorch dataloaders
def setup_data_loaders( dataset, batchsize ):
'''Separate 80/20 the dataset
Return: train/TEST loader , test set'''
test_size = len(dataset) - train_size
train_set, test_set = torch.utils.data.random_split( dataset, [train_size, test_size] )
train_loader = DataLoader( train_set,
batch_size=batchsize,
shuffle=True,
num_workers=0,
pin_memory=False ) # Running on CPU
TEST_loader = DataLoader( test_set,
batch_size=batchsize,
shuffle=True,
num_workers=0,
pin_memory=False) # Running on CPU
return(train_loader,TEST_loader, test_set )
#Creating the dataset
DATASET = CUSTOM_Dataset( values, label_1 ,label_2)
# GET train/test Dataloaders and test set
train_loader, TEST_loader, test_set = setup_data_loaders( DATASET, 500 )