TypeError: batch must contain tensors, numbers, dicts or lists; found object

nicoliKim · March 9, 2018, 11:53am

Hello Everyone!

I am rather new to PyTorch and I am trying to implement a previous project I had in TF in pytorch.
While testing my code so far I get the following error message:

Traceback (most recent call last):
  File "data2test.py", line 122, in <module>
    train(epoch)
  File "data2test.py", line 82, in train
    for batch_idx, (data, target) in enumerate(train_set,0):
  File "/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 259, in __next__
    batch = self.collate_fn([self.dataset[i] for i in indices])
  File "/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 135, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 135, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 119, in default_collate
    raise TypeError(error_msg.format(elem.dtype))
TypeError: batch must contain tensors, numbers, dicts or lists; found object

This error comes when the following function I defined for training is called:

train_set = DataLoader(dataset=molecules,
                       batch_size=args.train_batch,
                       sampler=SubsetRandomSampler(train_indices)
                       )


def train(epoch):

    model.train()
    for batch_idx, (data, target) in enumerate(train_set):
        print(batch_idx, batch_idx.shape)
        print(data, target)

        quit()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data, args.blocks)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_set.dataset), 100. * batch_idx / len(train_set), loss.data[0]))

Can someone of you help me in understanding why this error occurs and why it seems that the Dataloader applied on my data (molecules) seems to return an object and not a tensor?

I am sorry if the question might seems obvious or dumb but as I said I am new to PyTorch and I am trying to improve!

Thanks a lot in advance!

Kim

Shani_Gamrian · March 9, 2018, 12:02pm

What is molecules? seems like it wasn’t transformed to Tensors.

nicoliKim · March 9, 2018, 12:08pm

molecules is an instance of a class I called MoleclesLoader. Here is the code:

class MoleculesLoader(Dataset):
    """ Pytorch Dataloader Class for qm9 dataset."""

    def __init__(self, destpath):
        """
        :param destpath: Path to the .npz database previously created
        """
        self.destpath = destpath
        with np.load(self.destpath) as data:
            self.len = len(data['numbers'])
            numbers = data['numbers']
            position = data['distances']
            energy = data['energies']
            sample = {'Atomic Numbers': numbers, 'Distance Matrix': position, 'U_0 Energy': energy}
            X = np.array(list(zip(list(numbers), list(position), list(energy))))

        #  input
        self.x_data =np.dstack((X[:,0],X[:,1]))[0]  # (133885, 2)
        #  labels
        self.y_data = X[:,2]  # (133885,)

    def __getitem__(self, index):

        return self.x_data[index], self.y_data[index]

    #  x_data[index] -> (2,) array
    #   x_data[index][0] = shape (29,)
    #   x_data[index][1] = shape (29,29)
    #  y_data[index] -> () int

    def __len__(self):
        return self.len

Shani_Gamrian · March 9, 2018, 12:24pm

You need to wrap the data with transforms.Compose before you return it.
For example add to the __init__:

self.transform = transforms.Compose([transforms.ToTensor()])  # you can add to the list all the transformations you need.

and in __getitem__ do:

return self.transform(self.x_data[index]), self.transform(self.y_data[index])

Didn’t try it on your code but I think it should work.

nicoliKim · March 12, 2018, 2:44pm

Thanks a lot, it worked!
Sorry for the late reply but I didn’t realized that my reply was not posted last week!

Shani_Gamrian · March 12, 2018, 4:09pm

Glad I could help. Please press solution in the relevant comment.

CalinTimbus · September 17, 2018, 7:47am

Hello. I would also like to point out that the position of the ToTensor() transforms in the transform list is important. When using transforms, be careful to use Resize() or other transforms before the ToTensor().
Maybe it’s a beginner mistake but I spent 12 hours trying to figure out what was wrong with my code

raymond715 · December 16, 2019, 8:44am

I met exactly same error and I found that the type of data I create in init is object. I specified it to float and solved my problem.

Nishanth_Nair · March 12, 2022, 6:04pm

Hi, I tried the solution mentioned by @Shani_Gamrian but it gives me this error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-83-62e0a3e164ee> in <module>
      1 output_list = []
----> 2 for batch_idx, samples in enumerate(train_loader): #(20419, 21)
      3     y = model(tf.convert_to_tensor(samples)) #Converting to tensor as only a tensor can be passed to our model
      4     output_list.append(y)
      5 print(output_list)

c:\python\python39\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
    519             if self._sampler_iter is None:
    520                 self._reset()
--> 521             data = self._next_data()
    522             self._num_yielded += 1
    523             if self._dataset_kind == _DatasetKind.Iterable and \

c:\python\python39\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
    559     def _next_data(self):
    560         index = self._next_index()  # may raise StopIteration
--> 561         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    562         if self._pin_memory:
    563             data = _utils.pin_memory.pin_memory(data)

c:\python\python39\lib\site-packages\torch\utils\data\_utils\fetch.py in fetch(self, possibly_batched_index)
     47     def fetch(self, possibly_batched_index):
     48         if self.auto_collation:
---> 49             data = [self.dataset[idx] for idx in possibly_batched_index]
     50         else:
     51             data = self.dataset[possibly_batched_index]

c:\python\python39\lib\site-packages\torch\utils\data\_utils\fetch.py in <listcomp>(.0)
     47     def fetch(self, possibly_batched_index):
     48         if self.auto_collation:
---> 49             data = [self.dataset[idx] for idx in possibly_batched_index]
     50         else:
     51             data = self.dataset[possibly_batched_index]

<ipython-input-78-3cde72f064c5> in __getitem__(self, index)
     14     def __getitem__(self, index):
     15         #return self.dataframe.iloc[index]
---> 16         return self.transform(self.x_data[index]), self.transform(self.y_data[index])

c:\python\python39\lib\site-packages\torch\utils\data\dataset.py in __getattr__(self, attribute_name)
     81             return function
     82         else:
---> 83             raise AttributeError
     84 
     85     @classmethod

AttributeError:

I got transforms using torchvision.
Here is the rest of the code:

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms

class PandasDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.transform = transforms.Compose([transforms.ToTensor()])  
        # you can add to the list all the transformations you need.

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        #return self.dataframe.iloc[index]
        return self.transform(self.x_data[index]), self.transform(self.y_data[index])

dataset = PandasDataset(data)
train_loader = DataLoader(dataset, batch_size=200, shuffle=False, num_workers=0)

And then I am trying to use the for loop as for batch_idx, samples in enumerate(train_loader):

Any help is appreciated.

ptrblck · March 13, 2022, 12:59am

In your code self.x_data and self.y_data are undefined, which will raise the AttributeError.

Nishanth_Nair · March 13, 2022, 4:35pm

Thank you so much. It works now.