I’d like to see how for batch_idx, (inputs, targets) in enumerate(trainloader):
acts so I typed it on main program. When I debug the line the error h5py objects cannot be pickled
is thrown though it works normally inside def(train):
block. Anyways to iterate through dataloader and get the current batch? Thanks.
To elaborate main program is like below. There is a FER2013
class to instantiate Dataset
class.
#creating datasets with this class
class FER2013(data.Dataset):
def __init__(self, split='Training', transform=None):
self.transform = transform
self.split = split # training set or test set
self.data = h5py.File('./data/data.h5', 'r', driver='core')
# now load the picked numpy arrays
if self.split == 'Training':
self.train_data = self.data['Training_pixel']
self.train_labels = self.data['Training_label']
self.train_data = np.asarray(self.train_data)
self.train_data = self.train_data.reshape((28709, 48, 48))
elif self.split == 'PublicTest':
self.PublicTest_data = self.data['PublicTest_pixel']
self.PublicTest_labels = self.data['PublicTest_label']
self.PublicTest_data = np.asarray(self.PublicTest_data)
self.PublicTest_data = self.PublicTest_data.reshape((3589, 48, 48))
else:
self.PrivateTest_data = self.data['PrivateTest_pixel']
self.PrivateTest_labels = self.data['PrivateTest_label']
self.PrivateTest_data = np.asarray(self.PrivateTest_data)
self.PrivateTest_data = self.PrivateTest_data.reshape((3589, 48, 48))
def __getitem__(self, index):
if self.split == 'Training':
img, target = self.train_data[index], self.train_labels[index]
elif self.split == 'PublicTest':
img, target = self.PublicTest_data[index], self.PublicTest_labels[index]
else:
img, target = self.PrivateTest_data[index], self.PrivateTest_labels[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = img[:, :, np.newaxis]
img = np.concatenate((img, img, img), axis=2)
img = Image.fromarray(img)
if self.transform is not None:
img = self.transform(img)
return img, target
def __len__(self):
if self.split == 'Training':
return len(self.train_data)
elif self.split == 'PublicTest':
return len(self.PublicTest_data)
else:
return len(self.PrivateTest_data)
transform_train = transforms.Compose([transforms.ToTensor()])
trainset = FER2013(split = 'Training', transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=1)
When using next(iter(trainloader))
as you suggested this is the traceback:
a = next(iter(trainloader))
Traceback (most recent call last):
File "<ipython-input-74-117bf0cfa61e>", line 1, in <module>
a = next(iter(trainloader))
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\site-packages\torch\utils\data\dataloader.py", line 279, in __iter__
return _MultiProcessingDataLoaderIter(self)
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\site-packages\torch\utils\data\dataloader.py", line 719, in __init__
w.start()
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\multiprocessing\popen_spawn_win32.py", line 89, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Users\Tulpar\Anaconda3\envs\torch\lib\site-packages\h5py\_hl\base.py", line 308, in __getnewargs__
raise TypeError("h5py objects cannot be pickled")
TypeError: h5py objects cannot be pickled
I instantiate datasets from a h5 file so should that be causing problems? I’m on Win10 py3.8 btw.