Change Mask RCNN Config

Zret3ck · December 2, 2021, 2:42pm

Hi I am currently following this guide to create a RCNN that is able to detect ears. I have trained it with my training dataset for 50 epochs and the final result is either really accurate or has a lot of noise

0019

Would it be possible to change the config for the RCNN to specify the maximum amount of ears in an image, or specify a higher minimum confidence? Maybe some other parameters which might help in increasing the accuracy too

Get Model Function:

def get_model_instance_segmentation(num_classes):

# load an instance segmentation model pre-trained pre-trained on COCO

model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

# get number of input features for the classifier

in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# now get the number of input features for the mask classifier

in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels

hidden_layer = 256

# and replace the mask predictor with a new one

model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,

                                                hidden_layer,

                                                num_classes)

#FIXME doesn't work, check how to save/load the trained model

#if os.path.exists(model_PATH):

#    model.load_state_dict(torch.load(model_PATH))

return model

Evaluate Test Function (Probably Unrelated):

@torch.inference_mode()

def evaluate(model, data_loader, device):

#n_threads = torch.get_num_threads()

# FIXME remove this and make paste_masks_in_image run on the GPU

#torch.set_num_threads(1)

#cpu_device = torch.device("cpu")

model.eval()

with torch.no_grad():

    for idx, (images, targets) in enumerate(data_loader):

        images = list(img.to(device) for img in images)

        prediction = model(images)

        image_mask = np.zeros((360, 480, 1), dtype="uint8")

        for i in range(len(prediction[0]['masks'])):

            # iterate over masks

            mask = prediction[0]['masks'][i, 0]

            mask = mask.mul(255).byte().cpu().numpy()

            contours, _ = cv2.findContours(

                    mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)

            cv2.drawContours(image_mask, contours, -1, (255, 0, 0), 2, cv2.LINE_AA)

        cv2.imwrite("segmentation/results/" + format(idx + 1, "04") + '.jpeg', image_mask)