Batch error with loader for dataset that returns dicts

I have the following dataset that implements __len__() and __getitem__(), where each item is a dict. The DataLoader constructer (with batch_size=1) also runs without error:

from torch.utils.data import DataLoader

print(f"dataset.__len__(): {dataset.__len__()}\n")
print(f"dataset.__getitem__(1): {dataset.__getitem__(1).keys()}\n")
loader = DataLoader(
    dataset,
    batch_size=1,
)
print(f"type(loader): {type(loader)}\n")

Output:

dataset.__len__(): 11909

dataset.__getitem__(1): dict_keys(['rna', 'atac'])

type(loader): <class 'torch.utils.data.dataloader.DataLoader'>

However, simply iterating over batches gives an error that seems to indicate that __getitem__() is not returning a dict: the final line reads TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found :

for batch in loader:
    print(batch.keys())

Output:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:128, in collate(batch, collate_fn_map)
    127 try:
--> 128     return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
    129 except TypeError:
    130     # The mapping type may not support `__init__(iterable)`.

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:128, in (.0)
    127 try:
--> 128     return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
    129 except TypeError:
    130     # The mapping type may not support `__init__(iterable)`.

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:151, in collate(batch, collate_fn_map)
    149             return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]
--> 151 raise TypeError(default_collate_err_msg_format.format(elem_type))

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found 

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
Cell In[15], line 1
----> 1 for batch in loader:
      2     print(batch.keys())

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/dataloader.py:628, in _BaseDataLoaderIter.__next__(self)
    625 if self._sampler_iter is None:
    626     # TODO(https://github.com/pytorch/pytorch/issues/76750)
    627     self._reset()  # type: ignore[call-arg]
--> 628 data = self._next_data()
    629 self._num_yielded += 1
    630 if self._dataset_kind == _DatasetKind.Iterable and \
    631         self._IterableDataset_len_called is not None and \
    632         self._num_yielded > self._IterableDataset_len_called:

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/dataloader.py:671, in _SingleProcessDataLoaderIter._next_data(self)
    669 def _next_data(self):
    670     index = self._next_index()  # may raise StopIteration
--> 671     data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    672     if self._pin_memory:
    673         data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py:61, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
     59 else:
     60     data = self.dataset[possibly_batched_index]
---> 61 return self.collate_fn(data)

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:265, in default_collate(batch)
    204 def default_collate(batch):
    205     r"""
    206         Function that takes in a batch of data and puts the elements within the batch
    207         into a tensor with an additional outer dimension - batch size. The exact output type can be
   (...)
    263             >>> default_collate(batch)  # Handle `CustomType` automatically
    264     """
--> 265     return collate(batch, collate_fn_map=default_collate_fn_map)

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:131, in collate(batch, collate_fn_map)
    128         return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
    129     except TypeError:
    130         # The mapping type may not support `__init__(iterable)`.
--> 131         return {key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}
    132 elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
    133     return elem_type(*(collate(samples, collate_fn_map=collate_fn_map) for samples in zip(*batch)))

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:131, in (.0)
    128         return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
    129     except TypeError:
    130         # The mapping type may not support `__init__(iterable)`.
--> 131         return {key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}
    132 elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
    133     return elem_type(*(collate(samples, collate_fn_map=collate_fn_map) for samples in zip(*batch)))

File /usr/local/python/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py:151, in collate(batch, collate_fn_map)
    147         except TypeError:
    148             # The sequence type may not support `__init__(iterable)` (e.g., `range`).
    149             return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]
--> 151 raise TypeError(default_collate_err_msg_format.format(elem_type))

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found 

I’m unsure if this was a known issue which was recently fixed in the nightly builds, but could you try to update to the most recent nightly binary and check if you are still seeing the error?
If so, could you post a minimal and executable code snippet reproducing the issue, please?