Dataloader creating data which is partially on CPU and GPU

I am running the code below to create data loaders for Graph data:
“”"
batch_size = 128
train_list =[]
for idx, batch in enumerate(zip(X_train[train_idx],
class_v[train_idx],
adj_train[train_idx])):
edge_index, _ = dense_to_sparse(torch.from_numpy(batch[2]).float())
train_list.append(Data(x = torch.from_numpy(batch[0]).float(),
y = torch.from_numpy(batch[1]).float(),
edge_index = edge_index
)
)

batch_train_loader = DataLoader(train_list, batch_size=batch_size)

val_list = []
for idx, batch in enumerate(zip(X_train_val[val_idx],
class_v_val[val_idx],
adj_train_val[val_idx])):
edge_index, _ = dense_to_sparse(torch.from_numpy(batch[2]).float())
val_list.append(Data(x = torch.from_numpy(batch[0]).float(),
y = torch.from_numpy(batch[1]).float(),
edge_index = edge_index)
)
batch_val_loader = DataLoader(val_list, batch_size=batch_size)
“”"

When I try to read this data. I get an error that data cannot be on two devices.
“”"
idx = 0
for data in batch_train_loader:
idx+=1
print(data.x.shape,data.y.shape,data.edge_index.shape)
if idx==3:
break
“”"
“”"
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_17752/355153217.py in
1 idx = 0
----> 2 for data in batch_train_loader:
3 idx+=1
4 print(data.x.shape,data.y.shape,data.edge_index.shape)
5 if idx==3:

~\miniconda3\lib\site-packages\torch\utils\data\dataloader.py in next(self)
519 if self._sampler_iter is None:
520 self._reset()
→ 521 data = self._next_data()
522 self._num_yielded += 1
523 if self._dataset_kind == _DatasetKind.Iterable and \

~\miniconda3\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
559 def _next_data(self):
560 index = self._next_index() # may raise StopIteration
→ 561 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
562 if self._pin_memory:
563 data = _utils.pin_memory.pin_memory(data)

~\miniconda3\lib\site-packages\torch\utils\data_utils\fetch.py in fetch(self, possibly_batched_index)
45 else:
46 data = self.dataset[possibly_batched_index]
—> 47 return self.collate_fn(data)

~\miniconda3\lib\site-packages\torch_geometric\loader\dataloader.py in call(self, batch)
37
38 def call(self, batch):
—> 39 return self.collate(batch)
40
41

~\miniconda3\lib\site-packages\torch_geometric\loader\dataloader.py in collate(self, batch)
17 elem = batch[0]
18 if isinstance(elem, Data) or isinstance(elem, HeteroData):
—> 19 return Batch.from_data_list(batch, self.follow_batch,
20 self.exclude_keys)
21 elif isinstance(elem, torch.Tensor):

~\miniconda3\lib\site-packages\torch_geometric\data\batch.py in from_data_list(cls, data_list, follow_batch, exclude_keys)
61 Will exclude any keys given in :obj:exclude_keys."""
62
—> 63 batch, slice_dict, inc_dict = collate(
64 cls,
65 data_list=data_list,

~\miniconda3\lib\site-packages\torch_geometric\data\collate.py in collate(cls, data_list, increment, add_batch, follow_batch, exclude_keys)
74
75 # Collate attributes into a unified representation:
—> 76 value, slices, incs = _collate(attr, values, data_list, stores,
77 increment)
78

~\miniconda3\lib\site-packages\torch_geometric\data\collate.py in _collate(key, values, data_list, stores, increment)
142 incs = get_incs(key, values, data_list, stores)
143 if incs.dim() > 1 or int(incs[-1]) != 0:
→ 144 values = [value + inc for value, inc in zip(values, incs)]
145 else:
146 incs = None

~\miniconda3\lib\site-packages\torch_geometric\data\collate.py in (.0)
142 incs = get_incs(key, values, data_list, stores)
143 if incs.dim() > 1 or int(incs[-1]) != 0:
→ 144 values = [value + inc for value, inc in zip(values, incs)]
145 else:
146 incs = None

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
“”"
Not sure why this should happen. I am also seeing a considerable slowing in my training loop.

Are you setting the default tensor type to a CUDATensor or is any class pushing the data to the GPU (e.g. Data)?
Could you check the .device attribute of the tensors while iterating the train_list?