I am building a custom COCO dataset, and attempting to run it through the object detection tutorial found under TorchVision Object Detection Finetuning Tutorial — PyTorch Tutorials 1.9.0+cu102 documentation
I’ve gotten the tutorials PennFudanPed dataset trained, evaluated… it all seems to work reasonably and in line with the expectations of the tutorial.
However, when I add the CocoDataset … I notice my mask predictions come out looking empty
for prediction in predictions:
masks = prediction['masks']
torch.set_printoptions(profile="full")
print(str(masks))
...
> tensor([], size=(0, 1, 912, 1631))
I’ve double checked the masks and they’re correctly aligned and I am getting good metrics on the output evaluation step likeso
creating index...
index created!
[W pthreadpool-cpp.cc:99] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)
Test: [ 0/50] eta: 0:00:19 model_time: 0.0885 (0.0885) evaluator_time: 0.0033 (0.0033) time: 0.3849 data: 0.2859 max mem: 5575
Test: [49/50] eta: 0:00:00 model_time: 0.0392 (0.0413) evaluator_time: 0.0024 (0.0032) time: 0.0459 data: 0.0021 max mem: 5575
Test: Total time: 0:00:03 (0.0683 s / it)
Averaged stats: model_time: 0.0392 (0.0413) evaluator_time: 0.0024 (0.0032)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.901
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.847
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.908
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.667
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.923
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.923
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.850
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.930
IoU metric: segm
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.909
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.883
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.912
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.672
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.929
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.929
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.900
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.932
That's it!
I attach the dataset here…
import numpy as np
import os
import json
import torch
from PIL import Image
from skimage.draw import polygon2mask
def segmentation_to_mask_boolean(segmentation_list, img):
image_shape = img.size
segmentation_list = segmentation_list[0]
# How many elements each list should have
n = 2
# using list comprehension
final_cords_list = [segmentation_list[i * n:(i + 1) * n] for i in range((len(segmentation_list) + n - 1) // n)]
polygon = np.array(final_cords_list)
mask = polygon2mask(image_shape, polygon)
mask = np.rot90(mask)
mask = np.rot90(mask)
mask = np.rot90(mask)
mask = np.flip(mask, axis=1)
return mask
class CocoDataset(object):
def __init__(self, root, transforms=None):
self.root = root
self.transforms = transforms
self.imgs = list(sorted(os.listdir(os.path.join(root, "JPEGImages"))))
anno_path = os.path.join(root, "annotations.json")
with open(anno_path) as annotation_file:
self.annotations_data = json.load(annotation_file)
self.imgs = self.annotations_data["images"]
def __getitem__(self, idx):
img_path = os.path.join(self.root, self.imgs[idx]['file_name'])
img = Image.open(img_path).convert("RGB")
objects_list = []
for object in self.annotations_data['annotations']:
if object['image_id'] == idx:
objects_list.append(object)
num_objs = len(objects_list)
total_area = 0
boxes = []
masks = []
if num_objs == 0:
raise Exception("Image_ID has NO OBJECTS " + str(idx))
for i in range(num_objs):
pos = objects_list[i]['bbox']
xmin = pos[0]
xmax = pos[0] + pos[2]
ymin = pos[1]
ymax = pos[1] + pos[3]
boxes.append([xmin, ymin, xmax, ymax])
masks.append(segmentation_to_mask_boolean(objects_list[i]['segmentation'], img))
total_area += objects_list[i]['area']
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
I have not really changed the tutorials engine or training function with the exception I save the model
for epoch in range(num_epochs):
...
print("That's it!")
torch.save(model.state_dict(), "/saving_the_model.pth")
Scoring is then done in a separate scoring.py
def score(paths_to_images):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 2
model = get_model_instance_segmentation(num_classes, has_mask=False, use_pretrained=False)
path_to_model = "/path_to_my_model.pth"
state_dict = torch.load(path_to_model) if torch.cuda.is_available() else torch.load(path_to_model, map_location=device)
model.load_state_dict(state_dict)
model.eval()
model.to(device)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
images = ImageDataset(paths_to_images, transform=transform)
loader = torch.utils.data.DataLoader(images, batch_size=1, num_workers=1)
all_predictions = []
with torch.no_grad():
for batch in loader:
predictions = list(model(batch.to(device)))
for prediction in predictions:
all_predictions.append(prediction)
return predictions