How can I train custom dataset using FasterRCNN?

hysskm · January 15, 2024, 10:36am

class TrashDataset(Dataset):
    def __init__(self, classes, anno_path, img_dir, transform = None):
        self.anno_list = sorted(list(anno_path.glob('*/*/*.json'))) # annotation 파일들 모으고 sorted로 정렬
        self.img_list = sorted(list(img_dir.glob('*/*/*.jpg'))) # image 파일들을 모으고 sorted로 정렬
        self.transform= transform
        self.classes = classes

    def __getitem__(self, idx):
        # img 읽어오기
        img = read_image(str(self.img_list[idx]))

        # img와 맞는 annotation_path
        annotation_path = self.anno_list[idx]

        #  ret : [[xmin,ymin,xmax,ymax,label_ind],...]
        ret=[]
        with open(annotation_path, 'r') as f:
            data = load(f)
            for object in data['objects']:
                bndbox=[]
                for coord in object["annotation"]['coord']: # [X,Y,Width,Height] -> [X,Y,Xmax,Ymax] 변환
                    if coord == 'x':
                        bndbox.append(int(object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[0]))
                    elif coord =='y':
                        bndbox.append(int(object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[1]))
                    elif coord == 'width':
                        bndbox.append(int(object['annotation']['coord']['x'] + object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[0]))
                    elif coord == 'height':
                        bndbox.append(int(object['annotation']['coord']['y'] + object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[1]))
                bndbox.append(classes.index(object['class_name']))
        # [3 channels, width, height], [[Xmin,Ymin,Xmax,Ymax,label_idx],....] // shape [N,5]
                width,height = data['Info']['RESOLUTION'].split('/')
                ret.append(bndbox)
        coord = [row[:-1] for row in ret]
        boxes = tv_tensors.BoundingBoxes(coord,format="XYXY",canvas_size=(int(height),int(width)))
        transformed_img, transformed_boxes = self.transform(torch.tensor(img),boxes)
        class_idx = [row[-1] for row in ret]
        return transformed_img, transformed_boxes, class_idx
    def __len__(self):
        return len(self.anno_list)

I made a CustomDataset like this.

So I can get a image tensor [3,224,224] ,
boundingboxes ex) BoundingBoxes([[ 40, 116, 74, 224],
[ 74, 196, 97, 224],
[ 68, 112, 104, 212],
[110, 167, 160, 224],
[115, 123, 137, 171],
[152, 137, 179, 174],
[113, 75, 148, 131],
[ 78, 76, 114, 158],
[ 45, 15, 76, 82],
[ 95, 30, 124, 84],
[113, 0, 149, 38]], format=BoundingBoxFormat.XYXY, canvas_size=(224, 224)),

class_idx ex) [18, 18, 24, 16, 18, 18, 24, 18, 16, 18, 24]

and I visualized to check if my customdataset works or not

I want to know how can I train my customdataset by using FasterRCNN