I have created a Pytorch Geometric InMemoryDataset and am running into some problems when loading this dataset. When the data has not been processed yet the Processing does not seem to finish correctly as I am not getting the usual “Done!” message in the terminal, and also my code does not continue as expected but just quits without an error message.
If I then try to rerun the code, pytorch tries to load the processed dataset but throws the following error:
PytorchStreamReader failed reading zip archive: failed finding central directory.
Here is the code for my dataset:
class HurricaneDataset(InMemoryDataset):
def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
super().__init__(root, transform, pre_transform, pre_filter)
self.data, self.slices = torch.load(self.processed_paths[0])
@property
def raw_file_names(self):
return os.listdir(self.root + "/raw")
@property
def processed_file_names(self):
return ["data.pt"]
def download(self):
pass
def len(self):
"""
Reads length of .npz archive
"""
return len(self.raw_file_names)
def process(self):
data_list = [torch.load(self.root + "/raw/" + f) for f in self.raw_file_names]
if self.pre_filter is not None:
data_list = [data for data in data_list if self.pre_filter(data)]
if self.pre_transform is not None:
data_list = [self.pre_transform(data.item()) for data in data_list]
data, slices = self.collate(data_list)
torch.save((data, slices), self.processed_paths[0])