Pytorch RuntimeError: stack expects each tensor to be equal size

Hello,
I try to run this project : https://github.com/fukka/pointfusion
When I train the model :


num_epochs = 200

model = MLP_Global()
model.cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
regressor = nn.SmoothL1Loss()
classifier = nn.BCELoss()

im = torch.FloatTensor(1)
points = torch.FloatTensor(1)
offset = torch.FloatTensor(1)
m = torch.FloatTensor(1)
# rot_matrix = torch.FloatTensor(1)
gt_corners = torch.FloatTensor(1)

im = im.cuda()
points = points.cuda()
offset = offset.cuda()
m = m.cuda()
# rot_matrix = rot_matrix.cuda()
gt_corners = gt_corners.cuda()

im = Variable(im)
points = Variable(points)
offset = Variable(offset)
m = Variable(m)
# rot_matrix = Variable(rot_matrix)
gt_corners = Variable(gt_corners)

date = '2020_09_15__1'

output_dir = os.path.join(root_path,'/trained_model/')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

min_loss = 100

for epoch in range(1, num_epochs + 1):
    nusc_iter = iter(nusc_dataloader)
    loss_temp = 0
    loss_epoch = 0
    model = model.train()

    for step in range(nusc_iters_per_epoch):
        print("nusc_iter",nusc_iter)
        data = next(nusc_iter)
        print("data",data)
        with torch.no_grad():
            im.resize_(data[0].size()).copy_(data[0])
            points.resize_(data[1].size()).copy_(data[1])
            offset.resize_(data[2].size()).copy_(data[2])
            m.resize_(data[3].size()).copy_(data[3])
            # rot_matrix.resize_(data[4].size()).copy_(data[4])
            gt_corners.resize_(data[5].size()).copy_(data[5])
        boxes, classes = model(im, points)
        loss = 0
        n = 400

        loss = regressor(boxes, gt_corners)

        loss_temp += loss.item()
        loss_epoch += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 10 == 0 and step != 0:
            loss_temp /= 10
            print("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                  .format(epoch, num_epochs + 1, step, nusc_iters_per_epoch, loss_temp))
            loss_temp = 0
    loss_epoch /= nusc_iters_per_epoch
    logger.scalar_summary('loss', loss_epoch, epoch)

    if loss_epoch < min_loss:
        min_loss = loss_epoch
        print("Saving model...")
        save_name = os.path.join(output_dir, 'pointfusion_{}_{}.pth'.format(epoch, date))
        torch.save({'session': date,
                    'epoch': epoch + 1,
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                    }, save_name)

    print("Loss for Epoch {} is {}".format(epoch, loss_epoch))
    loss_epoch = 0

I obtain this error :

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-21-babfe1d36a49> in <module>()
     46     for step in range(nusc_iters_per_epoch):
     47         print("nusc_iter",nusc_iter)
---> 48         data = next(nusc_iter)
     49         print("data",data)
     50         with torch.no_grad():

6 frames
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    361 
    362     def __next__(self):
--> 363         data = self._next_data()
    364         self._num_yielded += 1
    365         if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
    401     def _next_data(self):
    402         index = self._next_index()  # may raise StopIteration
--> 403         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    404         if self._pin_memory:
    405             data = _utils.pin_memory.pin_memory(data)

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     45         else:
     46             data = self.dataset[possibly_batched_index]
---> 47         return self.collate_fn(data)

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     82             raise RuntimeError('each element in list of batch should be of equal size')
     83         transposed = zip(*batch)
---> 84         return [default_collate(samples) for samples in transposed]
     85 
     86     raise TypeError(default_collate_err_msg_format.format(elem_type))

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in <listcomp>(.0)
     82             raise RuntimeError('each element in list of batch should be of equal size')
     83         transposed = zip(*batch)
---> 84         return [default_collate(samples) for samples in transposed]
     85 
     86     raise TypeError(default_collate_err_msg_format.format(elem_type))

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     62                 raise TypeError(default_collate_err_msg_format.format(elem.dtype))
     63 
---> 64             return default_collate([torch.as_tensor(b) for b in batch])
     65         elif elem.shape == ():  # scalars
     66             return torch.as_tensor(batch)

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
     53             storage = elem.storage()._new_shared(numel)
     54             out = elem.new(storage)
---> 55         return torch.stack(batch, 0, out=out)
     56     elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
     57             and elem_type.__name__ != 'string_':

RuntimeError: stack expects each tensor to be equal size, but got [3, 46] at entry 0 and [3, 33] at entry 2

I have already tried to define a collate function but this did not work :

# a simple custom collate function, just to show the idea
def my_collate(batch):
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    target = torch.LongTensor(target)
    return [data, target]

Any help please ?

I assume your custom collate function should work. What kind of error message are you seeing using it?

Thanks for your reply, I have deleted the my_collate function and I have did pointcloud zero-padding => the error disappeared. But I am not sure if it is a good solution and if this won’t affect the learning behavior ?