KeyError:'Keypoints' in Pytorch-How to resolve this?

Hi, Everyone

I’m currently working on Pytorch project and training keypoint detection model from scratch using custom coco dataset. so whenever I’m trying to run the train.py file. I’m getting KeyError:“keypoints”.

import datetime
import os
import time

import torch
import torch.utils.data
from torch import nn
import torchvision
import torchvision.models.detection
import torchvision.models.detection.mask_rcnn

from torchvision import transforms

from coco_utils import get_coco, get_coco_kp

from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from engine import train_one_epoch, evaluate

import utils
import transforms as T
from PIL import Image

def get_dataset(name, image_set, transform):
paths = {
“coco”: (‘/home/hzj/data/COCO2017/’, get_coco, 91),
“coco_kp”: (‘/home/hzj/data/COCO2017/’, get_coco_kp, 2)
}
p, ds_fn, num_classes = paths[name]

ds = ds_fn(p, image_set=image_set, transforms=transform)
return ds, num_classes

def get_transform(train):
transforms =
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)

def main(args):
utils.init_distributed_mode(args)
print(args)

device = torch.device(args.device)

# Data loading code
print("Loading data")

dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True))
dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False))

print("Creating data loaders")
if args.distributed:
    train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
    test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
else:
    train_sampler = torch.utils.data.RandomSampler(dataset)
    test_sampler = torch.utils.data.SequentialSampler(dataset_test)

if args.aspect_ratio_group_factor >= 0:
    group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
    train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
else:
    train_batch_sampler = torch.utils.data.BatchSampler(
        train_sampler, args.batch_size, drop_last=True)

data_loader = torch.utils.data.DataLoader(
    dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1,
    sampler=test_sampler, num_workers=args.workers,
    collate_fn=utils.collate_fn)

print("Creating model")
model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes,
                                                          pretrained=args.pretrained)
model.to(device)

model_without_ddp = model
if args.distributed:
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
    model_without_ddp = model.module

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

if args.resume:
    checkpoint = torch.load(args.resume, map_location='cpu')
    model_without_ddp.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])

if args.test_only:
    evaluate(model, data_loader_test, device=device)
    return

print("Start training")
start_time = time.time()
for epoch in range(args.epochs):
    if args.distributed:
        train_sampler.set_epoch(epoch)
    train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
    lr_scheduler.step()
    if args.output_dir:
        utils.save_on_master({
            'model': model_without_ddp.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'args': args},
            os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

    # evaluate after every epoch
    evaluate(model, data_loader_test, device=device)

total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))

if name == “main”:
import argparse
parser = argparse.ArgumentParser(description=‘PyTorch Detection Training’)

parser.add_argument('--data-path', default='/home/hzj/data/COCO2017/', help='dataset')
parser.add_argument('--dataset', default='coco_kp', help='dataset')
parser.add_argument('--model', default='keypointrcnn_resnet50_fpn', help='model')
parser.add_argument('--device', default='cuda:0', help='device')
parser.add_argument('-b', '--batch-size', default=2, type=int)
parser.add_argument('--epochs', default=13, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 16)')
parser.add_argument('--lr', default=0.02, type=float, help='initial learning rate')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)',
                    dest='weight_decay')
parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
parser.add_argument('--lr-steps', default=[8, 11], nargs='+', type=int, help='decrease lr every step-size epochs')
parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
parser.add_argument('--output-dir', default='.', help='path where to save')
parser.add_argument('--resume', default='', help='resume from checkpoint')
parser.add_argument('--aspect-ratio-group-factor', default=0, type=int)
parser.add_argument(
    "--test-only",
    dest="test_only",
    help="Only test the model",
    action="store_true",
)

parser.add_argument(
    "--pretrained",
    dest="pretrained",
    help="Use pre-trained models from the modelzoo",
    action="store_true",
)

# distributed training parameters
parser.add_argument('--world-size', default=1, type=int,
                    help='number of distributed processes')
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')

args = parser.parse_args()

if args.output_dir:
    utils.mkdir(args.output_dir)

main(args)                                                                                                                                                    this my entire training code can anyone y please help me out

Hey Mudith, it would be easier to help if you format your code in the question better, post the error and the exact line where it seems to occur, and share the script command used. Thanks!

import datetime
import os
import time

import torch
import torch.utils.data
from torch import nn
import torchvision
import torchvision.models.detection
import torchvision.models.detection.mask_rcnn

from torchvision import transforms

from coco_utils import get_coco,get_coco_kp

from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from engine import train_one_epoch, evaluate

import utils
import transforms as T
from PIL import Image

def get_dataset(name, image_set, transform):
    paths = {
        "coco_kp": ('/coco_dataset/', get_coco,2),
        "coco_kp": ('/coco_dataset/', get_coco,2),
        #"coco_kp": ('/coco_dataset/', get_coco_kp,91),
        #"coco_kp": ('/coco_dataset/', get_coco_kp,91)
    }
    p, ds_fn, num_classes = paths[name]          #path=/coco_dataset/
    						 # ds_fn  #which gives the path that process the method and also gives the hashcode  
    #print("ds_fn",ds_fn)
    #print("num_classes",num_classes)             #91 classes
    #exit()
    #exit()
    #print(p, ds_fn, num_classes)   #get coco_
    #exit()
    ds=ds_fn(p, image_set=image_set, transforms=transform)
    #print(ds)
    return ds, num_classes


def get_transform(train):
    transforms=[]
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)


def main(args):
    utils.init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # Data loading code
    print("Loading data")

    dataset, num_classes=get_dataset(args.dataset, "train2017", get_transform(train=True))
    #print(type(dataset))
    #exit()
    dataset_test, _=get_dataset(args.dataset, "val2017", get_transform(train=False))
    #print("original_testdataset_length",len(dataset_test))

    print("Creating data loaders")
    if args.distributed:
        train_sampler=torch.utils.data.distributed.DistributedSampler(dataset)  #splits the data loader total indices to the parts then distribute evenly to the data loader at each process
        #print("train_sampler",len(train_sampler))
        #exit()
        test_sampler=torch.utils.data.distributed.DistributedSampler(dataset_test)
        #print("original_test_sampler",len(test_sampler))
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)          #A sampler that return random indices
        print("train_sampler",train_sampler)
        #exit()
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)
        #print("sequential_sampler_length",len(test_sampler))
        #exit(0)
    if args.aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
        print("group_id_len",len(group_ids))
        #exit()
        train_batch_sampler=GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
        print("train_batch_sampler_len",len(train_batch_sampler))
        #exit()
        #print("-------------------------------------------------------")
    else:
        #print("0000000000000000000000000000000000000000000000000000000000")
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)
        #print(len(train_batch_sampler[0]))

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        #dataset, batch_size=2, num_workers=args.workers,
        collate_fn=utils.collate_fn)
    #print("Type_of_Data_Loader",type(data_loader))
    #exit(0)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)

    print("Creating model")
    model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes,
                                                              pretrained=args.pretrained)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model=torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
    
    if args.test_only:
        evaluate(model, data_loader_test, device=device)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
        lr_scheduler.step()
        if args.output_dir:
            utils.save_on_master({
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args},
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

        # evaluate after every epoch
        evaluate(model, data_loader_test, device=device)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description='PyTorch Detection Training')

    parser.add_argument('--data-path', default='/coco_dataset/', help='dataset')
    parser.add_argument('--dataset', default='coco_kp', help='dataset')
    parser.add_argument('--model', default='keypointrcnn_resnet50_fpn', help='model')
    parser.add_argument('--device', default='cuda:0', help='device')
    parser.add_argument('-b', '--batch-size', default=2, type=int)
    parser.add_argument('--epochs', default=13, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                        help='number of data loading workers (default: 16)')
    parser.add_argument('--lr', default=0.02, type=float, help='initial learning rate')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                        metavar='W', help='weight decay (default: 1e-4)',
                        dest='weight_decay')
    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
    parser.add_argument('--lr-steps', default=[8, 11], nargs='+', type=int, help='decrease lr every step-size epochs')
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
    parser.add_argument('--output-dir', default='.', help='path where to save')
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--aspect-ratio-group-factor', default=0, type=int)
    parser.add_argument(
        "--test-only",
        dest="test_only",
        help="Only test the model",
        action="store_true",
    )
    
    parser.add_argument(
        "--pretrained",
        dest="pretrained",
        help="Use pre-trained models from the modelzoo",
        action="store_true",
    )

    # distributed training parameters
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')

    args = parser.parse_args()

    if args.output_dir:
        utils.mkdir(args.output_dir)

    main(args)

Any update @UMAR_MASUD regarding the above error

Hi, thanks for the updated post. While I haven’t run this particular script directly, If I were to debug I would check what the target data looks like. The file & line that is throwing the error is this - link . The target here is supposed to be a list of dictionaries, and each dictionary should have a key called ‘keypoints’. Do a check on your targets and see if this is consistent.