Hi I am currently following this guide to create a RCNN that is able to detect ears. I have trained it with my training dataset for 50 epochs and the final result is either really accurate or has a lot of noise
Would it be possible to change the config for the RCNN to specify the maximum amount of ears in an image, or specify a higher minimum confidence? Maybe some other parameters which might help in increasing the accuracy too
Get Model Function:
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
#FIXME doesn't work, check how to save/load the trained model
#if os.path.exists(model_PATH):
# model.load_state_dict(torch.load(model_PATH))
return model
Evaluate Test Function (Probably Unrelated):
@torch.inference_mode()
def evaluate(model, data_loader, device):
#n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
#torch.set_num_threads(1)
#cpu_device = torch.device("cpu")
model.eval()
with torch.no_grad():
for idx, (images, targets) in enumerate(data_loader):
images = list(img.to(device) for img in images)
prediction = model(images)
image_mask = np.zeros((360, 480, 1), dtype="uint8")
for i in range(len(prediction[0]['masks'])):
# iterate over masks
mask = prediction[0]['masks'][i, 0]
mask = mask.mul(255).byte().cpu().numpy()
contours, _ = cv2.findContours(
mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
cv2.drawContours(image_mask, contours, -1, (255, 0, 0), 2, cv2.LINE_AA)
cv2.imwrite("segmentation/results/" + format(idx + 1, "04") + '.jpeg', image_mask)