Hello,
I try to run this project : https://github.com/fukka/pointfusion
When I train the model :
num_epochs = 200
model = MLP_Global()
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
regressor = nn.SmoothL1Loss()
classifier = nn.BCELoss()
im = torch.FloatTensor(1)
points = torch.FloatTensor(1)
offset = torch.FloatTensor(1)
m = torch.FloatTensor(1)
# rot_matrix = torch.FloatTensor(1)
gt_corners = torch.FloatTensor(1)
im = im.cuda()
points = points.cuda()
offset = offset.cuda()
m = m.cuda()
# rot_matrix = rot_matrix.cuda()
gt_corners = gt_corners.cuda()
im = Variable(im)
points = Variable(points)
offset = Variable(offset)
m = Variable(m)
# rot_matrix = Variable(rot_matrix)
gt_corners = Variable(gt_corners)
date = '2020_09_15__1'
output_dir = os.path.join(root_path,'/trained_model/')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
min_loss = 100
for epoch in range(1, num_epochs + 1):
nusc_iter = iter(nusc_dataloader)
loss_temp = 0
loss_epoch = 0
model = model.train()
for step in range(nusc_iters_per_epoch):
print("nusc_iter",nusc_iter)
data = next(nusc_iter)
print("data",data)
with torch.no_grad():
im.resize_(data[0].size()).copy_(data[0])
points.resize_(data[1].size()).copy_(data[1])
offset.resize_(data[2].size()).copy_(data[2])
m.resize_(data[3].size()).copy_(data[3])
# rot_matrix.resize_(data[4].size()).copy_(data[4])
gt_corners.resize_(data[5].size()).copy_(data[5])
boxes, classes = model(im, points)
loss = 0
n = 400
loss = regressor(boxes, gt_corners)
loss_temp += loss.item()
loss_epoch += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step % 10 == 0 and step != 0:
loss_temp /= 10
print("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
.format(epoch, num_epochs + 1, step, nusc_iters_per_epoch, loss_temp))
loss_temp = 0
loss_epoch /= nusc_iters_per_epoch
logger.scalar_summary('loss', loss_epoch, epoch)
if loss_epoch < min_loss:
min_loss = loss_epoch
print("Saving model...")
save_name = os.path.join(output_dir, 'pointfusion_{}_{}.pth'.format(epoch, date))
torch.save({'session': date,
'epoch': epoch + 1,
'model': model.state_dict(),
'optimizer': optimizer.state_dict()
}, save_name)
print("Loss for Epoch {} is {}".format(epoch, loss_epoch))
loss_epoch = 0
I obtain this error :
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-21-babfe1d36a49> in <module>()
46 for step in range(nusc_iters_per_epoch):
47 print("nusc_iter",nusc_iter)
---> 48 data = next(nusc_iter)
49 print("data",data)
50 with torch.no_grad():
6 frames
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
361
362 def __next__(self):
--> 363 data = self._next_data()
364 self._num_yielded += 1
365 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
401 def _next_data(self):
402 index = self._next_index() # may raise StopIteration
--> 403 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
404 if self._pin_memory:
405 data = _utils.pin_memory.pin_memory(data)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
45 else:
46 data = self.dataset[possibly_batched_index]
---> 47 return self.collate_fn(data)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
82 raise RuntimeError('each element in list of batch should be of equal size')
83 transposed = zip(*batch)
---> 84 return [default_collate(samples) for samples in transposed]
85
86 raise TypeError(default_collate_err_msg_format.format(elem_type))
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in <listcomp>(.0)
82 raise RuntimeError('each element in list of batch should be of equal size')
83 transposed = zip(*batch)
---> 84 return [default_collate(samples) for samples in transposed]
85
86 raise TypeError(default_collate_err_msg_format.format(elem_type))
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
62 raise TypeError(default_collate_err_msg_format.format(elem.dtype))
63
---> 64 return default_collate([torch.as_tensor(b) for b in batch])
65 elif elem.shape == (): # scalars
66 return torch.as_tensor(batch)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/collate.py in default_collate(batch)
53 storage = elem.storage()._new_shared(numel)
54 out = elem.new(storage)
---> 55 return torch.stack(batch, 0, out=out)
56 elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
57 and elem_type.__name__ != 'string_':
RuntimeError: stack expects each tensor to be equal size, but got [3, 46] at entry 0 and [3, 33] at entry 2
I have already tried to define a collate function but this did not work :
# a simple custom collate function, just to show the idea
def my_collate(batch):
data = [item[0] for item in batch]
target = [item[1] for item in batch]
target = torch.LongTensor(target)
return [data, target]
Any help please ?