I am trying to fine tune a fasterrcnn_resnet50_fpn
for object detection. I am following this tutorial https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
The model is training but at the
evaluation step
i get the following error:
TypeError Traceback (most recent call last)
<ipython-input-88-b072184830f7> in <module>()
1 model = get_model()
2 model.to(device)
----> 3 evaluate(model, val_dl, device=device)
3 frames
/usr/local/lib/python3.6/dist-packages/torch/autograd/grad_mode.py in decorate_context(*args, **kwargs)
13 def decorate_context(*args, **kwargs):
14 with self:
---> 15 return func(*args, **kwargs)
16 return decorate_context
17
/content/vision/references/detection/engine.py in evaluate(model, data_loader, device)
78 header = 'Test:'
79
---> 80 coco = get_coco_api_from_dataset(data_loader.dataset)
81 iou_types = _get_iou_types(model)
82 coco_evaluator = CocoEvaluator(coco, iou_types)
/content/vision/references/detection/coco_utils.py in get_coco_api_from_dataset(dataset)
204 if isinstance(dataset, torchvision.datasets.CocoDetection):
205 return dataset.coco
--> 206 return convert_to_coco_api(dataset)
207
208
/content/vision/references/detection/coco_utils.py in convert_to_coco_api(ds)
180 ann['category_id'] = labels[i]
181 categories.add(labels[i])
--> 182 ann['area'] = areas[i]
183 ann['iscrowd'] = iscrowd[i]
184 ann['id'] = ann_id
TypeError: 'float' object is not subscriptable
By Dataset and Dataloaders are as follows:
import albumentations as A
from albumentations.pytorch import ToTensorV2
trn_tfms = A.Compose([
A.Rotate(limit=50, p=0.5),
A.Resize(height, width, always_apply=True),
A.CLAHE(),
A.ToFloat(max_value=255.0),
ToTensorV2(p=1.0),
], p=1.0, bbox_params = A.BboxParams(format="pascal_voc"))
val_tfms = A.Compose([
A.Resize(height, width, always_apply=True),
A.ToFloat(max_value=255.0),
ToTensorV2(p=1.0),
], p=1.0, bbox_params = A.BboxParams(format="pascal_voc"))
class RacoonDataset(Dataset):
def __init__(self, dataframe, transforms=None):
self.df = dataframe
self.transforms = transforms
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
# 1. image id
image_id = torch.tensor([idx])
# 2. Grab the image path and read in the image
image_pth = self.df["filename"][idx]
image = cv2.cvtColor(cv2.imread(image_pth), cv2.COLOR_BGR2RGB)
# 3. Since only 1 label we will label it as one
label = torch.ones((1, ), dtype=torch.int64)
# 4. Grab the xmin, ymin, height and width co-ordinates and create the box and calculate area
xmin = self.df["xmin"][idx]
ymin = self.df["ymin"][idx]
xmax = self.df["xmax"][idx]
ymax = self.df["ymax"][idx]
box = [xmin, ymin, xmax, ymax]
area = self.df["width"][idx] * self.df["height"][idx]
# apply transforms
if self.transforms is not None:
box.append("raccoon")
transformed = self.transforms(image=image, bboxes=[box])
image = transformed["image"]
box = transformed["bboxes"][0][:-1]
# Convert box and area to tensor
box = np.array(box).reshape(1, -1)
box = torch.tensor(box, dtype=torch.float32)
area = torch.tensor(area, dtype=torch.float32)
# 5. Suppose all instances are not crowd
iscrowd = torch.zeros((1,), dtype=torch.int64)
# Create the target
target = {}
target["boxes"] = box
target["labels"] = label
target["area"] = area
target["image_id"] = image_id
target["iscrowd"] = iscrowd
return image, target
train_ds = RacoonDataset(dataframe=train_annots, transforms=trn_tfms)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_ds = RacoonDataset(dataframe=test_annots, transforms=val_tfms)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
After encountering the error I tried just training the model without the evaluate step and it works perfectly fine:
Epoch: [0] [ 0/22] eta: 0:01:24 lr: 0.000243 loss: 0.8521 (0.8521) loss_classifier: 0.7601 (0.7601) loss_box_reg: 0.0779 (0.0779) loss_objectness: 0.0047 (0.0047) loss_rpn_box_reg: 0.0094 (0.0094) time: 3.8298 data: 0.0685 max mem: 8647
Epoch: [0] [10/22] eta: 0:00:44 lr: 0.002621 loss: 0.3628 (0.4534) loss_classifier: 0.2488 (0.3548) loss_box_reg: 0.0781 (0.0838) loss_objectness: 0.0043 (0.0054) loss_rpn_box_reg: 0.0092 (0.0094) time: 3.7276 data: 0.0596 max mem: 8647
Epoch: [0] [20/22] eta: 0:00:07 lr: 0.005000 loss: 0.2463 (0.3354) loss_classifier: 0.1465 (0.2421) loss_box_reg: 0.0757 (0.0767) loss_objectness: 0.0058 (0.0071) loss_rpn_box_reg: 0.0093 (0.0094) time: 3.7297 data: 0.0613 max mem: 8647
Epoch: [0] [21/22] eta: 0:00:03 lr: 0.005000 loss: 0.2328 (0.3259) loss_classifier: 0.1438 (0.2332) loss_box_reg: 0.0757 (0.0759) loss_objectness: 0.0058 (0.0075) loss_rpn_box_reg: 0.0092 (0.0092) time: 3.6679 data: 0.0593 max mem: 8647
Epoch: [0] Total time: 0:01:20 (3.6764 s / it)
Epoch: [1] [ 0/22] eta: 0:01:22 lr: 0.005000 loss: 0.1399 (0.1399) loss_classifier: 0.0387 (0.0387) loss_box_reg: 0.0741 (0.0741) loss_objectness: 0.0169 (0.0169) loss_rpn_box_reg: 0.0101 (0.0101) time: 3.7427 data: 0.0616 max mem: 8647
Epoch: [1] [10/22] eta: 0:00:44 lr: 0.005000 loss: 0.1230 (0.1278) loss_classifier: 0.0387 (0.0442) loss_box_reg: 0.0672 (0.0639) loss_objectness: 0.0111 (0.0105) loss_rpn_box_reg: 0.0092 (0.0093) time: 3.7330 data: 0.0562 max mem: 8647
Epoch: [1] [20/22] eta: 0:00:07 lr: 0.005000 loss: 0.1084 (0.1150) loss_classifier: 0.0352 (0.0394) loss_box_reg: 0.0550 (0.0580) loss_objectness: 0.0062 (0.0080) loss_rpn_box_reg: 0.0090 (0.0095) time: 3.7347 data: 0.0603 max mem: 8647
Epoch: [1] [21/22] eta: 0:00:03 lr: 0.005000 loss: 0.1050 (0.1139) loss_classifier: 0.0344 (0.0388) loss_box_reg: 0.0546 (0.0574) loss_objectness: 0.0062 (0.0078) loss_rpn_box_reg: 0.0090 (0.0100) time: 3.6725 data: 0.0602 max mem: 8647
Epoch: [1] Total time: 0:01:20 (3.6786 s / it)
Epoch: [2] [ 0/22] eta: 0:01:22 lr: 0.005000 loss: 0.0959 (0.0959) loss_classifier: 0.0291 (0.0291) loss_box_reg: 0.0566 (0.0566) loss_objectness: 0.0014 (0.0014) loss_rpn_box_reg: 0.0086 (0.0086) time: 3.7363 data: 0.0614 max mem: 8647
Epoch: [2] [10/22] eta: 0:00:44 lr: 0.005000 loss: 0.0803 (0.0815) loss_classifier: 0.0291 (0.0279) loss_box_reg: 0.0395 (0.0412) loss_objectness: 0.0032 (0.0027) loss_rpn_box_reg: 0.0094 (0.0096) time: 3.7409 data: 0.0704 max mem: 8647
Epoch: [2] [20/22] eta: 0:00:07 lr: 0.005000 loss: 0.0796 (0.0814) loss_classifier: 0.0286 (0.0293) loss_box_reg: 0.0372 (0.0394) loss_objectness: 0.0029 (0.0031) loss_rpn_box_reg: 0.0094 (0.0096) time: 3.7351 data: 0.0634 max mem: 8647
Epoch: [2] [21/22] eta: 0:00:03 lr: 0.005000 loss: 0.0796 (0.0818) loss_classifier: 0.0286 (0.0295) loss_box_reg: 0.0372 (0.0394) loss_objectness: 0.0029 (0.0031) loss_rpn_box_reg: 0.0098 (0.0097) time: 3.6728 data: 0.0620 max mem: 8647
Epoch: [2] Total time: 0:01:20 (3.6781 s / it)
What am i missing here ?