I am working on a data set that I stored in pickle extention the data set is set as this :
train data:
classe1:
instence1.pickle
instance2.pickle
…
classe2:
…
I think I am having a big delay in training because of this :
class CustomDataset_train(Dataset):
def __init__(self):
self.data_path = dataset_dir_train
file_list = glob.glob(self.data_path + "*")
self.data = []
for class_path in file_list:
class_name = class_path.split("\\")[-1]
for data_path in glob.glob(class_path + "/*.pickle"):
self.data.append([data_path, class_name])
self.class_map = {'class1 ': 0,
'class2':1,
......
}
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
data_path, class_name = self.data[idx]
data = pickle.load(file=open(data_path, "rb"))
class_id = self.class_map[class_name]
fused_tensor_data=torch.from_numpy(data)
return (fused_tensor_data, class_id)
the data is stored as a numpy array with the shape of 250,164,3 in every pickle file . is there a solution to make it faster ? something like DataFolder