Training torchvision faster rcnn on custom dataset

Hi, I want to train the torchvision.models.detection.fasterrcnn_resnet50_fpn model on PASCAL-Part Dataset for Joint Object and Semantic Part Detection similar to as discussed in the paper (so, I will have to add extra code for my task). Can anyone tell me how to train the Faster-RCNN model on this dataset? I cannot find a code for training this model on pytorch documentation.

Thanks.

Did you find any success?

you can refer to below code:

import torch
import torch.nn as nn
from data import PalmDataset
from torch.utils.data import DataLoader
#from torchvision.models.detection import fasterrcnn_resnet50_fpn as faster_rcnn
from models.faster_rcnn import fasterrcnn_resnet50_fpn as faster_rcnn
import numpy as np
import argparse
import os
import imgaug.augmenters as iaa

from config import *
from tqdm import tqdm

def set_random_seed():
    np.random.seed(0)
    torch.manual_seed(0)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(0)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

def print_sys_info():
    print('Torch version: {}'.format(torch.__version__))
    if torch.cuda.is_available():
        print('cuda version: {}'.format(torch.version.cuda))
        print('cudnn version: {}'.format(torch.backends.cudnn.version()))
        print('GPU name: {}'.format(torch.cuda.get_device_name()))
    print()

def parameters_parser():
    parser = argparse.ArgumentParser(description='This app is used to train a faster-rcnn model to detect palmprint')

    parser.add_argument('--epochs', type=int, default=300, metavar='N', help="To speccify the total epochs, default: 300")
    parser.add_argument('--gpu', type=int, default=0, metavar='GPU', help="To determine which GPU device is used to train the model, default: 0")
    parser.add_argument('--batch-size', type=int, default=4, metavar='N', help="To choose the batch size, default: 4")
    parser.add_argument('--lr', type=float, default=1e-5, metavar='LR', help="To specify learning rate, default: 1e-5")
    parser.add_argument('--output', type=str, default='default', help="To specify dir to save model, default: 'default'")
    parser.add_argument('--save-epoch', type=int, default=5, metavar='N', help='To determine how many epochs each model is saved, default: 5')
    parser.add_argument('--logfile', type=str, default='log.txt', metavar='LOG', help="To specify where log will be saved, default: 'log.txt'")
    parser.add_argument('--finetune', type=str, default=None, metavar='MODEL', help="To specify a checkpoints where to start to fine tune a model, default: None")

    args = parser.parse_args()
    return args

def get_targets(bxes, lbs, device):
    assert len(bxes) == len(lbs)
    tl = []
    for b, l in zip(bxes, lbs):
        target = {'boxes' : b.to(device), 'labels' : l.to(device)}
        tl.append(target)
    return tl   

def main():
    set_random_seed()
    print_sys_info()
    
    args = parameters_parser()

    seq = iaa.Sequential([
          #iaa.Fliplr(0.5),
          #iaa.Affine(translate_percent=(-0.1, 0.1)),
          iaa.GammaContrast((0.5, 2.0)),
          iaa.GaussianBlur((0, 3.0)),
          iaa.Resize({'height': 640, 'width': 360})  #just for fine tuning
    ])

    dataset = PalmDataset(IMGS_DIR, ANNOS_FILE, seq)
    data = DataLoader(dataset, batch_size=args.batch_size, num_workers=4, shuffle=True, drop_last=False, pin_memory=True)
    device = torch.device('cuda:%d'%args.gpu if torch.cuda.is_available() else 'cpu')

    model_path = CHECKPOINTS_DIR + args.output + '/'
    if os.path.exists(model_path) == False:
        os.mkdir(model_path)

    model = faster_rcnn(pretrained=False, num_classes=2, pretrained_backbone=True, trainable_backbone_layers=5)
    if args.finetune is not None:
        model.load_state_dict(torch.load(args.finetune, map_location=device))
    model.to(device)
    model.train()

    optimizer = torch.optim.Adam(model.parameters(), args.lr)

    logs = []
    for epoch in range(1, args.epochs + 1):
        for imgs, boxes, labels in tqdm(data):
            targets = get_targets(boxes, labels, device)
            images = [image.to(device) for image in imgs]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
        msg = 'Train epoch: %3d, loss: %0.4f' % (epoch, losses.item())
        print(msg)
        logs.append(msg)

        if epoch % args.save_epoch == 0:
            save_path = model_path + "epoch-%03d.pt"%epoch
            torch.save(model.state_dict(), save_path)

    np.savetxt(LOG_DIR + args.logfile, np.array(logs), fmt="%s")

if __name__ == '__main__':
    main()

Good luck!