I am implementing the Faster RCNN paper and for the part of implementing the region proposal network, each training sample should have an image as the input and the label as a list of all possible bounding boxes.
However, passing a list of arrays(the bounding boxes) does not work actually.
What is the solution for this ?
I am currently using the custom Dataset class for creating the dataset and a dataloader for loading.
> # Creating the training dataset
class DetectionDataset(Dataset):
def __init__(self, data_frame, img_size = 512, is_transform=False):
self.data_frame = data_frame
self.is_transform = is_transform
self.mean = np.array([104.00699, 116.66877, 122.67892])
self.n_classes = 21
self.img_size = img_size if isinstance(img_size, tuple) else (img_size, img_size)
def __len__(self):
return self.data_frame.shape[0]
def __getitem__(self, i):
d = self.data_frame.ix[i]
filename = os.path.join(new_images_dir, d['fname'])+".jpg"
image = io.imread(filename)
#this code only uses one array(bounding box) at a time
label = np.array(d[['xmin', 'ymin', 'xmax', 'ymax']], np.int32)
image= np.array(image, dtype=np.uint8)
sample = {'image': image, 'lbl':label}
if self.is_transform:
sample = self.transform(sample)
return sample
def transform(self, sample):
img = sample['image']
lbl = sample['lbl']
img = img[:, :, ::-1]
img = img.astype(np.float64)
img -= self.mean
img = m.imresize(img, (self.img_size[0], self.img_size[1]))
# Resize scales images from 0 to 255, thus we need
# to divide by 255.0
img = img.astype(float) / 255.0
# NHWC -> NCWH
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).float()
lbl = np.array(lbl)
lbl = torch.from_numpy(lbl).long()
sample = {'image': img, 'lbl': lbl}
return sample
The error -
RuntimeError: Traceback (most recent call last):
File "/Users/navneetmkumar/anaconda/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 40, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])
File "/Users/navneetmkumar/anaconda/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 106, in default_collate
return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
File "/Users/navneetmkumar/anaconda/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 106, in <dictcomp>
return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
File "/Users/navneetmkumar/anaconda/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 91, in default_collate
return torch.stack(batch, 0, out=out)
File "/Users/navneetmkumar/anaconda/lib/python3.5/site-packages/torch/functional.py", line 66, in stack
return torch.cat(inputs, dim, out=out)
RuntimeError: inconsistent tensor sizes at /Users/soumith/miniconda2/conda-bld/pytorch_1502000696751/work/torch/lib/TH/generic/THTensorMath.c:2709