I am working with object detrection, when I declare the dataset class, I make the boxes to be flaot32 but when I output them using dataloaders, it becomes float64.
Also there is an error saying
Variable._ execution_engine.run_backward( 148 tensors, grad tensors _, retain_graph, create_graph, inputs, → 149 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag 150 151 RuntimeError: Found dtype Double but expected Float
I think its because of this
Dataset class(taken from 🐠 Reef- Starter Torch FasterRCNN Train [LB=0.416] | Kaggle)
class ReefDataset:
def __init__(self, df, transforms=None):
self.df = df
self.transforms = transforms
def can_augment(self, boxes):
""" Check if bounding boxes are OK to augment
For example: image_id 1-490 has a bounding box that is partially outside of the image
It breaks albumentation
Here we check the margins are within the image to make sure the augmentation can be applied
"""
box_outside_image = ((boxes[:, 0] < 0).any() or (boxes[:, 1] < 0).any()
or (boxes[:, 2] > 1280).any() or (boxes[:, 3] > 720).any())
return not box_outside_image
def get_boxes(self, row):
"""Returns the bboxes for a given row as a 3D matrix with format [x_min, y_min, x_max, y_max]"""
boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(float).values
# Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
return boxes
def get_image(self, row):
"""Gets the image for a given row"""
image = cv2.imread(f'{BASE_DIR}/{row["image_path"]}', cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
image /= 255.0
return image
def __getitem__(self, i):
row = self.df.iloc[i]
image = self.get_image(row)
boxes = self.get_boxes(row)
n_boxes = boxes.shape[0]
# Calculate the area
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
target = {
'boxes': torch.as_tensor(boxes, dtype=torch.float32),
'area': torch.as_tensor(area, dtype=torch.float32),
'image_id': torch.tensor([i]),
# There is only one class
'labels': torch.ones((n_boxes,), dtype=torch.int64),
# Suppose all instances are not crowd
'iscrowd': torch.zeros((n_boxes,), dtype=torch.int64)
}
if self.transforms and self.can_augment(boxes):
sample = {
'image': image,
'bboxes': target['boxes'],
'labels': target['labels']
}
sample = self.transforms(**sample)
image = sample['image']
if n_boxes > 0:
target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
else:
image = ToTensorV2(p=1.0)(image=image)['image']
return image, target
def __len__(self):
return len(self.df)
image, targets = ds_train[2200]
targets['boxes']
//output
tensor([[510.0000, 597.7778, 532.5000, 626.6667],
[560.0000, 173.3333, 575.6250, 201.1111],
[386.2500, 497.7778, 410.6250, 546.6667],
[348.1250, 403.3333, 373.7500, 447.7778],
[610.6250, 531.1111, 629.3750, 558.8889],
[515.0000, 568.8889, 537.5000, 608.8889],
[300.0000, 307.7778, 339.3750, 382.2222],
[613.1250, 430.0000, 648.1250, 474.4444],
[290.0000, 346.6667, 314.3750, 412.2222],
[581.2500, 488.8889, 596.2500, 515.5555],
[693.7500, 304.4445, 712.5000, 345.5555],
[675.6250, 262.2222, 693.7500, 300.0000],
[378.7500, 152.2222, 398.7500, 192.2222]], dtype=torch.float64)
Help needed!