Object detection tutorial with COCODataset giving empty mask predictions

I am building a custom COCO dataset, and attempting to run it through the object detection tutorial found under TorchVision Object Detection Finetuning Tutorial — PyTorch Tutorials 1.9.0+cu102 documentation

I’ve gotten the tutorials PennFudanPed dataset trained, evaluated… it all seems to work reasonably and in line with the expectations of the tutorial.

However, when I add the CocoDataset … I notice my mask predictions come out looking empty

for prediction in predictions:
    masks = prediction['masks']

    torch.set_printoptions(profile="full")

    print(str(masks))
...


> tensor([], size=(0, 1, 912, 1631))

I’ve double checked the masks and they’re correctly aligned and I am getting good metrics on the output evaluation step likeso

creating index...
index created!
[W pthreadpool-cpp.cc:99] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)
Test:  [ 0/50]  eta: 0:00:19  model_time: 0.0885 (0.0885)  evaluator_time: 0.0033 (0.0033)  time: 0.3849  data: 0.2859  max mem: 5575
Test:  [49/50]  eta: 0:00:00  model_time: 0.0392 (0.0413)  evaluator_time: 0.0024 (0.0032)  time: 0.0459  data: 0.0021  max mem: 5575
Test: Total time: 0:00:03 (0.0683 s / it)
Averaged stats: model_time: 0.0392 (0.0413)  evaluator_time: 0.0024 (0.0032)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.901
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.847
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.908
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.667
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.923
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.923
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.850
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.930
IoU metric: segm
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.909
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.883
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.912
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.672
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.929
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.929
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.900
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.932
That's it!

I attach the dataset here…

import numpy as np
import os
import json
import torch
from PIL import Image
from skimage.draw import polygon2mask

def segmentation_to_mask_boolean(segmentation_list, img):

    image_shape = img.size
    segmentation_list = segmentation_list[0]

    # How many elements each list should have
    n = 2
    # using list comprehension
    final_cords_list = [segmentation_list[i * n:(i + 1) * n] for i in range((len(segmentation_list) + n - 1) // n)]
    polygon = np.array(final_cords_list)
    mask = polygon2mask(image_shape, polygon)
    mask = np.rot90(mask)
    mask = np.rot90(mask)
    mask = np.rot90(mask)
    mask = np.flip(mask, axis=1)
    return mask


class CocoDataset(object):

    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "JPEGImages"))))
        anno_path = os.path.join(root, "annotations.json")
        with open(anno_path) as annotation_file:
            self.annotations_data = json.load(annotation_file)
            self.imgs = self.annotations_data["images"]

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.imgs[idx]['file_name'])
        img = Image.open(img_path).convert("RGB")

        objects_list = []

        for object in self.annotations_data['annotations']:
            if object['image_id'] == idx:
                objects_list.append(object)

        num_objs = len(objects_list)
        total_area = 0
        boxes = []
        masks = []

        if num_objs == 0:
            raise Exception("Image_ID has NO OBJECTS " + str(idx))

        for i in range(num_objs):
            pos = objects_list[i]['bbox']
            xmin = pos[0]
            xmax = pos[0] + pos[2]
            ymin = pos[1]
            ymax = pos[1] + pos[3]
            boxes.append([xmin, ymin, xmax, ymax])
            masks.append(segmentation_to_mask_boolean(objects_list[i]['segmentation'], img))
            total_area += objects_list[i]['area']

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

I have not really changed the tutorials engine or training function with the exception I save the model

    for epoch in range(num_epochs):
       ...

    print("That's it!")
    torch.save(model.state_dict(), "/saving_the_model.pth")

Scoring is then done in a separate scoring.py

def score(paths_to_images):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    num_classes = 2
    model = get_model_instance_segmentation(num_classes, has_mask=False, use_pretrained=False)

    path_to_model = "/path_to_my_model.pth"

    state_dict = torch.load(path_to_model) if torch.cuda.is_available() else torch.load(path_to_model, map_location=device)
    model.load_state_dict(state_dict)
    model.eval()
    model.to(device)

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    images = ImageDataset(paths_to_images, transform=transform)
    loader = torch.utils.data.DataLoader(images, batch_size=1, num_workers=1)

    all_predictions = []
    with torch.no_grad():
        for batch in loader:
            predictions = list(model(batch.to(device)))
            for prediction in predictions:
                all_predictions.append(prediction)

    return predictions

starting to give this post a look over… might be a way to fix it… but if someone can confirm these tensors are empty… as it looks in the printout… maybe I then need to debug either my saving of the model… or how I am scoring?

I checked the notation and it gives the following error

Additionally I looked inside the boxes prediction and likewise it’s just not seeming to work… but everything looks okay in the tutorials evaluation steps (getting 90% on metrics IoU) … have I saved and loaded the model incorrectly? or have I not resized the new data correctly??

image

I whittled down a simpler example of the original example and got a (I think) proper score function… only to realize the T.toTensor function does some strange things… and I need to rewrite this for inference… correct? don’t I need to transform all images to a tensor or ?

so far I have this

@torch.no_grad()
def score(paths_to_images):
    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    num_classes = 2

    dataset_inference = UnannotatedImageDataset(paths_to_images, transforms=get_transform_Inference())
    data_loader_inference_set = torch.utils.data.DataLoader(dataset_inference, batch_size=1, shuffle=False, num_workers=1, collate_fn=utils.collate_fn)

    linux_path_to_model = "/path_to_model.pth"
    path_to_model = linux_path_to_model
    model = get_model_instance_scoring(num_classes)

    state_dict = torch.load(path_to_model) if torch.cuda.is_available() else torch.load(path_to_model, map_location=device)
    model.load_state_dict(state_dict)
    model.to(device)

    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()

    images = list(img.to(device) for img in data_loader_inference_set)

    if torch.cuda.is_available():
        torch.cuda.synchronize()

    outputs = model(images)

    outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
    res = {target["image_id"].item(): output for output in outputs}
    torch.set_num_threads(n_threads)

And the Transforms look like this… why when i use ToTensorInference am I getting this error ?
TypeError: __call__() missing 1 required positional argument: 'target'

class ToTensor(nn.Module):
    def forward(self, image: Tensor, target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        image = F.to_tensor(image)
        return image, target


class ToTensorInference(nn.Module):
    def forward(self, image: Tensor) -> Tuple[Tensor]:
        image = F.to_tensor(image)
        return image

I thought I was on the right track but… something is crashing in the scoring…

/anaconda3/envs/pytorch_base/bin/python /torchvision-objectdetection-coco/score.py
UnannotatedDataSet found 6 items of interest
/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/cuda/__init__.py:104: UserWarning: 
NVIDIA GeForce RTX 3090 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the NVIDIA GeForce RTX 3090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/

  warnings.warn(incompatible_device_warn.format(device_name, capability, " ".join(arch_list), device_name))
Traceback (most recent call last):
  File "/torchvision-objectdetection-coco/score.py", line 75, in <module>
    score("/test_inference_set")
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
    return func(*args, **kwargs)
  File "/torchvision-objectdetection-coco/score.py", line 62, in score
    images = list(img.to(device) for img in data_loader_inference_set)
  File "/torchvision-objectdetection-coco/score.py", line 62, in <genexpr>
    images = list(img.to(device) for img in data_loader_inference_set)
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 517, in __next__
    data = self._next_data()
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
    return self._process_data(data)
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
    data.reraise()
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/_utils.py", line 429, in reraise
    raise self.exc_type(msg)
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
    data = fetcher.fetch(index)
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/anaconda3/envs/pytorch_base/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/emcp/Dev/git/EMCP/pytorch-tutorials/intermediate/torchvision-objectdetection-coco/datasets/UnannotatedImageDataset.py", line 49, in __getitem__
    img = self.transforms(img)
TypeError: __call__() missing 1 required positional argument: 'target'


Process finished with exit code 1