I’ve been creating weighted sampling for a CNN by loading in the data and iterating through each item’s labels as follows:
train_loader = DataLoader(Dataset(data_path, cube_length, transforms, img_size), batch_size=batch_size, shuffle=False)
dataiter = iter(train_loader)
images, labels = dataiter.next()
weights = weighting(labels)
sampler = WeightedRandomSampler(weights, len(weights))
However when I try this on my real dataset which has 1,032,960 images (and therefore labels) I get the following error:
OSError: [WinError 1455] The paging file is too small for this operation to complete
associated with the line:
images, labels = dataiter.next()
Reading the error, I’m assuming I’ve ran out of memory somewhere in either the reading of data or in appending the labels (the latter seeming unlikely).
Is there a way around this error perhaps a more efficient way of reading the dataset?
Perhaps using something like:
labels = []
for idx, (batch, target) in enumerate(train_loader)
labels.append(target)
Many thanks in advance. Full error message below:
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\astropy\utils\decorators.py in __get__(self, obj, owner)
733 try:
--> 734 return obj.__dict__[self._key]
735 except KeyError:
KeyError: 'data'
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-9-e20f44391918> in <module>()
13
14 start = time.time()
---> 15 train(model, transform, num_epochs=150, batch_size=64, lr_schedule=schedule, loss=loss_fn, suffix=suffix)
16 end = time.time()
17 print('TRAIN TIME:')
<ipython-input-8-d1389ed15555> in train(model, transforms, data_path, num_epochs, batch_size, verbose, cube_length, img_size, loss, lr_schedule, initial_lr, suffix)
11 train_loader = DataLoader(FITSCubeDataset(data_path, cube_length, transforms, img_size), batch_size=640*1614, shuffle=False)
12 dataiter = iter(train_loader)
---> 13 images, labels = dataiter.next()
14 weights = weighting(labels)
15 sampler = WeightedRandomSampler(weights, len(weights))
~\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
262 if self.num_workers == 0: # same-process loading
263 indices = next(self.sample_iter) # may raise StopIteration
--> 264 batch = self.collate_fn([self.dataset[i] for i in indices])
265 if self.pin_memory:
266 batch = pin_memory_batch(batch)
~\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py in <listcomp>(.0)
262 if self.num_workers == 0: # same-process loading
263 indices = next(self.sample_iter) # may raise StopIteration
--> 264 batch = self.collate_fn([self.dataset[i] for i in indices])
265 if self.pin_memory:
266 batch = pin_memory_batch(batch)
<ipython-input-5-f4f61f06aa37> in __getitem__(self, index)
43 cube_index = index // self.cube_length
44 slice_index = index % self.cube_length
---> 45 _img, _label = default_fits_loader(self.img_files[cube_index], self.img_size, slice_index)
46 _img[_img != _img] = 0
47 if self.transforms is not None:
<ipython-input-5-f4f61f06aa37> in default_fits_loader(file_name, img_size, slice_index)
22 def default_fits_loader(file_name: str, img_size: tuple, slice_index):
23 file = fits.open(file_name)
---> 24 _data = file[1].data
25 _data = resize(_data[slice_index], img_size)
26 _label = file[0].header['LABEL']
~\Anaconda3\lib\site-packages\astropy\utils\decorators.py in __get__(self, obj, owner)
734 return obj.__dict__[self._key]
735 except KeyError:
--> 736 val = self.fget(obj)
737 obj.__dict__[self._key] = val
738 return val
~\Anaconda3\lib\site-packages\astropy\io\fits\hdu\image.py in data(self)
241 return
242
--> 243 data = self._get_scaled_image_data(self._data_offset, self.shape)
244 self._update_header_scale_info(data.dtype)
245
~\Anaconda3\lib\site-packages\astropy\io\fits\hdu\image.py in _get_scaled_image_data(self, offset, shape)
707 code = BITPIX2DTYPE[self._orig_bitpix]
708
--> 709 raw_data = self._get_raw_data(shape, code, offset)
710 raw_data.dtype = raw_data.dtype.newbyteorder('>')
711
~\Anaconda3\lib\site-packages\astropy\io\fits\hdu\base.py in _get_raw_data(self, shape, code, offset)
478 offset=offset)
479 elif self._file:
--> 480 return self._file.readarray(offset=offset, dtype=code, shape=shape)
481 else:
482 return None
~\Anaconda3\lib\site-packages\astropy\io\fits\file.py in readarray(self, size, offset, dtype, shape)
268 # file)
269 memmap = Memmap(self._file, mode=MEMMAP_MODES[self.mode],
--> 270 dtype=np.uint8)
271
272 # Now we immediately discard the memmap array; we are
~\Anaconda3\lib\site-packages\numpy\core\memmap.py in __new__(subtype, filename, dtype, mode, offset, shape, order)
262 bytes -= start
263 array_offset = offset - start
--> 264 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)
265
266 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,
OSError: [WinError 1455] The paging file is too small for this operation to complete