I am trying to train the Mask R-CNN model on my dataset using torchvision.models.detection.maskrcnn_resnet50_fpn
I already prepared my annotations in a json file having the labels, bounding boxes in the shape of [x1, y1, x2, y2] and the corresponding binary mask. This is the Dataloader I am using:
import torch
import json
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from PIL import Image
import os
import numpy as np
from torchvision import transforms
import Config
class CustomDataset(Dataset):
def __init__(self, images_dir, masks_dir, json_file, transform=None):
self.images_dir = images_dir
self.masks_dir = masks_dir
with open(json_file) as f:
self.data = json.load(f)
self.transform = transform
self.image_ids = [img['id'] for img in self.data["images"]]
self.masks = {img['id']: np.array(Image.open(os.path.join(masks_dir, img['file_name'])).convert("L"), dtype = np.uint8) for img in self.data["images"]}
def __len__(self):
return len(self.image_ids)
def __getitem__(self, idx):
# Get image ID
img_id = self.image_ids[idx]
img = next(image for image in self.data["images"] if image["id"] == img_id)
# Load image
image = np.array(Image.open(os.path.join(self.images_dir, img['file_name'])).convert("L"), dtype = np.float)
mask = self.masks[img_id]
# apply the transform if any
if self.transform:
aug = self.transform(image = image, mask = mask)
image = aug['image']
mask = aug['mask']
annotations = [ann for ann in self.data["annotations"] if ann["image_id"] == img_id]
# extract boxes, labels and masks from annotations
boxes = [ann["bbox"] for ann in annotations]
labels = [ann["category_id"] for ann in annotations]
# convert the binary mask array to a torch tensor
mask = torch.tensor(mask)
# convert boxes to format [x1, y1, width, height]
boxes = [[bbox[0], bbox[1], bbox[2], bbox[3]] for bbox in boxes]
# convert labels to integers
labels = [int(label) for label in labels]
# convert image and mask to torch tensors
image = transforms.ToTensor()(image).to(Config.DEVICE)
# create target dictionary
target = {"boxes": torch.tensor(boxes).to(Config.DEVICE), "labels": torch.tensor(labels).to(Config.DEVICE), "masks": mask.to(Config.DEVICE)}
return image, target
and I am training the model using this code:
import torch
import torchvision
from torchvision import models
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import Config
from dataloader import CustomDataset
from imutils import paths
import numpy as np
from tqdm import tqdm
from torch.nn import CrossEntropyLoss
import torch.optim.lr_scheduler as lr_scheduler
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True).to(Config.DEVICE)
json_path = 'path_to_json_file'
transform = A.Compose([A.Resize(Config.Input_Height, Config.Input_Width),
A.Normalize(mean=(0.0), std=(1.0))])
# Create a dataloader for the custom dataset
dataset = CustomDataset(images_dir = Config.Image_dataset_dir, masks_dir = Config.Mask_dataset_dir,
json_file=json_path, transform = transform)
# Split the data into training, validation and testing sets
train_split = 0.8
val_split = 0.1
train_size = int(train_split * len(dataset))
val_size = int(val_split * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=Config.Batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=Config.Batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=Config.Batch_size, shuffle=False)
# Define the loss function, optimizer and scheduler
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)
# Train the model
for e in tqdm(range(Config.Num_epochs)):
totalTrainloss, totalValLoss = 0,0
model.train()
# loop over the training set
for (i, (x, y)) in enumerate(train_loader):
output = model(x)
totalTrainloss += loss_function(output, y)
optimizer.zero_grad()
totalTrainloss.backward()
optimizer.step()
# switch off autograd
with torch.no_grad():
# set the model in evaluation mode
model.eval()
# loop over the validation set
for (x, y) in val_loader:
# make the predictions and calculate the validation loss
output = model(x)
totalValLoss += loss_function(output, y)
print("EPOCH: {}/{}".format(e + 1, Config.Num_epochs))
print("Train loss: {:.4f}, Validation loss: {:.4f}".format(totalTrainloss, totalValLoss))
but the training of the model is returning the mentioned error in the title: AssertionError: targets should not be none when in training mode
I tried to loop over the train_loader
and print out image and target, and it returns the values from the dataloader as expected.
Any hint what’s going wrong here please?