I have the following dataset that implements __len__()
and __getitem__()
, where each item is a dict. The DataLoader constructer (with batch_size=1
) also runs without error:
from torch.utils.data import DataLoader
print(f"dataset.__len__(): {dataset.__len__()}\n")
print(f"dataset.__getitem__(1): {dataset.__getitem__(1).keys()}\n")
loader = DataLoader(
dataset,
batch_size=1,
)
print(f"type(loader): {type(loader)}\n")
Output:
dataset.__len__(): 11909
dataset.__getitem__(1): dict_keys(['rna', 'atac'])
type(loader): <class 'torch.utils.data.dataloader.DataLoader'>
However, simply iterating over batches gives an error that seems to indicate that __getitem__()
is not returning a dict: the final line reads TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found
:
for batch in loader:
print(batch.keys())
Output:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:128, in collate(batch, collate_fn_map)
127 try:
--> 128 return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
129 except TypeError:
130 # The mapping type may not support `__init__(iterable)`.
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:128, in (.0)
127 try:
--> 128 return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
129 except TypeError:
130 # The mapping type may not support `__init__(iterable)`.
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:151, in collate(batch, collate_fn_map)
149 return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]
--> 151 raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Cell In[15], line 1
----> 1 for batch in loader:
2 print(batch.keys())
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/dataloader.py:628, in _BaseDataLoaderIter.__next__(self)
625 if self._sampler_iter is None:
626 # TODO(https://github.com/pytorch/pytorch/issues/76750)
627 self._reset() # type: ignore[call-arg]
--> 628 data = self._next_data()
629 self._num_yielded += 1
630 if self._dataset_kind == _DatasetKind.Iterable and \
631 self._IterableDataset_len_called is not None and \
632 self._num_yielded > self._IterableDataset_len_called:
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/dataloader.py:671, in _SingleProcessDataLoaderIter._next_data(self)
669 def _next_data(self):
670 index = self._next_index() # may raise StopIteration
--> 671 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
672 if self._pin_memory:
673 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py:61, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
59 else:
60 data = self.dataset[possibly_batched_index]
---> 61 return self.collate_fn(data)
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:265, in default_collate(batch)
204 def default_collate(batch):
205 r"""
206 Function that takes in a batch of data and puts the elements within the batch
207 into a tensor with an additional outer dimension - batch size. The exact output type can be
(...)
263 >>> default_collate(batch) # Handle `CustomType` automatically
264 """
--> 265 return collate(batch, collate_fn_map=default_collate_fn_map)
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:131, in collate(batch, collate_fn_map)
128 return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
129 except TypeError:
130 # The mapping type may not support `__init__(iterable)`.
--> 131 return {key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}
132 elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
133 return elem_type(*(collate(samples, collate_fn_map=collate_fn_map) for samples in zip(*batch)))
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:131, in (.0)
128 return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
129 except TypeError:
130 # The mapping type may not support `__init__(iterable)`.
--> 131 return {key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}
132 elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
133 return elem_type(*(collate(samples, collate_fn_map=collate_fn_map) for samples in zip(*batch)))
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:151, in collate(batch, collate_fn_map)
147 except TypeError:
148 # The sequence type may not support `__init__(iterable)` (e.g., `range`).
149 return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]
--> 151 raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found