DataLoader crashes when shuffling

I’m using DataLoader to read from a custom Dataset object based on numpy memmap.
As long as I read the data without shuffling everything works fine but, as I set shuffle=True, the runtime crash.

I tried implementing the shuffling mechanism in the Dataset class by using a permutation vector and setting shuffle=False in the DataLoader but the issue persists.
I also noticed that, when shuffling, the __getitem__() function of the Dataset object is called n times, where n is the batch_size.

Here’s the Dataset code:

class CustomDataset(Dataset):
  num_pattern = 60112
  base_folder = 'dataset'

  def __init__(self, root):
    self.root = os.path.expanduser(root)

    self.output_ = np.memmap('{0}/output'.format(root), 'int64', 'r', shape=(60112, 62))
    self.out_len = np.memmap('{0}/output-lengths'.format(root), 'int32', 'r', shape=(60112))
    self.input_ = np.memmap('{0}/input'.format(root), 'float32', 'r', shape=(60112, 512, 1024))
    self.in_len = np.memmap('{0}/input-lengths'.format(root), 'int32', 'r', shape=(60112))

  def __len__(self):
    return self.num_pattern

  def __getitem__(self, index):
    return (self.in_len[index], torch.from_numpy(self.input_[index])), (self.out_len[index], torch.from_numpy(self.output_[index]))

if __name__ == '__main__':
  dataset = CustomDataset(root='/content/')
  data_loader = data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1)
  for i, data in enumerate(data_loader, 0):
    # training

The error stack is the following:

RuntimeError                              Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/ in _try_get_batch(self, timeout)
    510         try:
--> 511             data = self.data_queue.get(timeout=timeout)
    512             return (True, data)

9 frames
/usr/lib/python3.6/multiprocessing/ in get(self, block, timeout)
    103                     timeout = deadline - time.monotonic()
--> 104                     if not self._poll(timeout):
    105                         raise Empty

/usr/lib/python3.6/multiprocessing/ in poll(self, timeout)
    256         self._check_readable()
--> 257         return self._poll(timeout)

/usr/lib/python3.6/multiprocessing/ in _poll(self, timeout)
    413     def _poll(self, timeout):
--> 414         r = wait([self], timeout)
    415         return bool(r)

/usr/lib/python3.6/multiprocessing/ in wait(object_list, timeout)
    910             while True:
--> 911                 ready =
    912                 if ready:

/usr/lib/python3.6/ in select(self, timeout)
    375             try:
--> 376                 fd_event_list = self._poll.poll(timeout)
    377             except InterruptedError:

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/ in handler(signum, frame)
     62         # Python can still get and update the process status successfully.
---> 63         _error_if_any_worker_fails()
     64         if previous_handler is not None:

RuntimeError: DataLoader worker (pid 3978) is killed by signal: Bus error. 

During handling of the above exception, another exception occurred:

RuntimeError                              Traceback (most recent call last)
<ipython-input-8-b407a8532808> in <module>()
      5   data_loader = data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=1)
----> 7   for i, data in enumerate(data_loader, 0):
      8     print(i)

/usr/local/lib/python3.6/dist-packages/torch/utils/data/ in __next__(self)
    574         while True:
    575             assert (not self.shutdown and self.batches_outstanding > 0)
--> 576             idx, batch = self._get_batch()
    577             self.batches_outstanding -= 1
    578             if idx != self.rcvd_idx:

/usr/local/lib/python3.6/dist-packages/torch/utils/data/ in _get_batch(self)
    551         else:
    552             while True:
--> 553                 success, data = self._try_get_batch()
    554                 if success:
    555                     return data

/usr/local/lib/python3.6/dist-packages/torch/utils/data/ in _try_get_batch(self, timeout)
    517             if not all(w.is_alive() for w in self.workers):
    518                 pids_str = ', '.join(str( for w in self.workers if not w.is_alive())
--> 519                 raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str))
    520             if isinstance(e, queue.Empty):
    521                 return (False, None)

RuntimeError: DataLoader worker (pid(s) 3978) exited unexpectedly

I’m running it on Colab though I don’t think it could be the problem.