How to make Dataloader avoid collate_batch

Is there a way of prevent Dataloader of trying to collate batch? The images are different sizes because they are resized in first layer of the model

~/anaconda3/envs/seg/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

~/anaconda3/envs/seg/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    951             if len(self._task_info[self._rcvd_idx]) == 2:
    952                 data = self._task_info.pop(self._rcvd_idx)[1]
--> 953                 return self._process_data(data)
    954 
    955             assert not self._shutdown and self._tasks_outstanding > 0

~/anaconda3/envs/seg/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
    994         self._try_put_index()
    995         if isinstance(data, ExceptionWrapper):
--> 996             data.reraise()
    997         return data
    998 

~/anaconda3/envs/seg/lib/python3.7/site-packages/torch/_utils.py in reraise(self)
    393             # (https://bugs.python.org/issue2651), so we work around it.
    394             msg = KeyErrorMessage(msg)
--> 395         raise self.exc_type(msg)

RuntimeError: Caught RuntimeError in DataLoader worker process 10.
Original Traceback (most recent call last):
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 34, in fetch
    data = next(self.dataset_iter)
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/fastai2/data/load.py", line 107, in create_batches
    yield from map(self.do_batch, self.chunkify(res))
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/fastai2/data/load.py", line 128, in do_batch
    def do_batch(self, b): return self.retain(self.create_batch(self.before_batch(b)), b)
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/fastai2/data/load.py", line 127, in create_batch
    def create_batch(self, b): return (fa_collate,fa_convert)[self.prebatched](b)
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/fastai2/data/load.py", line 46, in fa_collate
    else type(t[0])([fa_collate(s) for s in zip(*t)]) if isinstance(b, Sequence)
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/fastai2/data/load.py", line 46, in <listcomp>
    else type(t[0])([fa_collate(s) for s in zip(*t)]) if isinstance(b, Sequence)
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/fastai2/data/load.py", line 45, in fa_collate
    return (default_collate(t) if isinstance(b, _collate_types)
  File "/home/david/anaconda3/envs/seg/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 55, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [3, 966, 1296] at entry 0 and [3, 1004, 1002] at entry 1

For variable shapes, you could implement a custom collate function as described here.

1 Like

Thank you, I will take a look into it