PyTorch image segmentation mask polygons

Does anyone know how to get the polygon masks from the inference results so I can then send some simple json across the wire to callers? I’m very unfamiliar with the Tensor output for the masks of the image during the segmentation inference.

model = torch.load(model_file)
model.to(device)

n_threads = torch.get_num_threads()
torch.set_num_threads(1)
model.eval()

def load_dataset():
    train_dataset = torchvision.datasets.ImageFolder(
        root="images",
        transform=torchvision.transforms.ToTensor()
    )
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=1,
        num_workers=1,
        shuffle=True
    )
    return train_loader

for batch_idx, (data, target) in enumerate(load_dataset()):

    data = data.cuda()

    output = model(data)

    boxes = output[0]["boxes"]
    labels = output[0]["labels"]
    scores = output[0]["scores"]
    masks = output[0]["masks"]
    
    # how can I find the polygons for the masks in the tensors
    print(masks)
    
torch.cuda.empty_cache()

I assume your predictions might be a tensor containing logits in the shape [batch_size, nb_classes, height, width].
If that’s the case, you could first create the binary predictions for each class using:

preds = torch.argmax(output, dim=1)
preds_class0 = preds == 0
preds_class1 = preds ==1
...

Once you have these binary maps, you could use findContours from OpenCV as described here.

Thanks for the help, my solution is below.

def random_colour_masks(image):
    colours = [[0, 255, 0],[0, 0, 255],[255, 0, 0],[0, 255, 255],[255, 255, 0],[255, 0, 255],[80, 70, 180],[250, 80, 190],[245, 145, 50],[70, 150, 250],[50, 190, 190]]
    r = np.zeros_like(image).astype(np.uint8)
    g = np.zeros_like(image).astype(np.uint8)
    b = np.zeros_like(image).astype(np.uint8)
    r[image == 1], g[image == 1], b[image == 1] = colours[random.randrange(0,10)]
    coloured_mask = np.stack([r, g, b], axis=2)
    return coloured_mask   

for batch_idx, (data, target) in enumerate(load_dataset()):

    data = data.cuda()

    output = model(data)
    
    boxes = output[0]["boxes"]
    labels = output[0]["labels"]
    scores = output[0]["scores"]
    masks = (output[0]['masks']>0.5).squeeze().detach().cpu().numpy()
    
    img_path = "images/my-img.jpg"
    
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
    for i in range(len(masks)):
        rgb_mask = random_colour_masks(masks[i])
    
        #convert mask to grayscale (optimise to directly convert mask to grayscale without first convertingit to color)
        gray_mask = cv2.cvtColor(rgb_mask, cv2.COLOR_RGB2GRAY)
        
        # Find contours:
        contours, hierarchy = cv2.findContours(gray_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        
        # Draw contours:
        cv2.drawContours(img, contours, -1, (0, 255, 0),20)
    
    plt.imshow(img)
    

I really do not need to convert the masks to color first, I just need the greyscale mask to find the contours but I could not get that to work.