Hello. I am recently trying to load tfrecords using pytorch. However, it seems that if I load tf.data.TFRecordDataset
in pytorch datasets and use dataloader with num_workers
> 0, the program won’t work properly. I am wondering if there is any better ways to load tfrecords or other better ways to store large scale datasets.
Here are the example codes:
class TestDataset(Dataset):
def __init__(self, record_path):
self.record_path = record_path
self.reader = tf.data.TFRecordDataset(self.record_path).map(decoder)
self._records_iter = self.reader.make_one_shot_iterator()
def __len__(self):
return 100
def _parser(self, img):
image_arr = np.frombuffer(img, dtype=np.uint8)
sample = torch.tensor(image_arr)
return sample
def __getitem__(self, item):
sample = next(self._records_iter).numpy()
return self._parser(sample)
dataset = TestDataset(0, path)
loader = DataLoader(dataset, batch_size=1, num_workers=1)
for i in loader:
print(i)