Hi, this is my code for data loader:
class PileupDataset(Dataset):
"""
Arguments:
A CSV file path
"""
def __init__(self, csv_path, transform=None):
tmp_df = pd.read_csv(csv_path, header=None)
assert tmp_df[0].apply(lambda x: os.path.isfile(x)).all(), \
"Some images referenced in the CSV file were not found"
self.transform = transform
self.X_train = tmp_df[0]
self.X_train_index = tmp_df[1]
# labelLists = []
# for label in tmp_df[2]:
# labelList = [int(x) for x in str(label)]
# labelLists.append(np.array(labelList, dtype=np.long))
self.y_train = tmp_df[2]
self.rec = tmp_df[3]
def __getitem__(self, index):
hdf5_file_path = self.X_train[index]
indx = self.X_train_index[index]
label_csv = self.y_train[index]
rec = self.rec[index]
hdf5_file = h5py.File(hdf5_file_path, 'r')
image_dataset = hdf5_file['images']
label_dataset = hdf5_file['labels']
img = image_dataset[indx]
label = label_dataset[indx]
img = img.astype(dtype=np.uint8)
if self.transform is not None:
img = self.transform(img)
if label != label_csv:
sys.stderr.write("CSV LABEL DID NOT MATCH HDF5 LABEL" + str(rec) + "\n")
return img, label_csv, rec
def __len__(self):
return len(self.X_train.index)
It works well on the big machine (253GB RAM/ 8GPU) with 40 workers. But when I test locally on my mac (8 cores). Even num_workers=2 is giving SIGSEGV. I am missing something here?