Why my v2.Transform doesnt work?

hysskm · May 20, 2024, 6:56am

Hello, I want to talk about problem that bounding Box coordinates doesn’t be transformed in Custom Dataset Class.

Figure_1

Look at this. Coordinates doesn’t transformed when using v2.transform

`class transformed_TrashDataset(Dataset):
def init(self, classes, transformed_classes ,anno_path, img_dir, transforms = None):
self.anno_list = sorted(list(anno_path.glob(‘.json’))) # annotation 파일들 모으고 sorted로 정렬
self.img_list = sorted(list(img_dir.glob('.jpg’))) # image 파일들을 모으고 sorted로 정렬
self.transform= transforms
self.transformed_classes = transformed_classes
self.classes = classes

def __getitem__(self, idx):
    # img 읽어오기
    img = read_image(str(self.img_list[idx]))

    # img와 맞는 annotation_path
    annotation_path = self.anno_list[idx]

    #  ret : [[xmin,ymin,xmax,ymax,label_ind],...]
    ret=[]
    with open(annotation_path, 'r',encoding='utf-8') as f:
        data = load(f)
        for object in data['objects']:
            bndbox=[]
            for coord in object["annotation"]['coord']: # [X,Y,Width,Height] -> [X,Y,Xmax,Ymax] 변환
                if coord == 'x':
                    bndbox.append(int(object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[0]))
                elif coord =='y':
                    bndbox.append(int(object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[1]))
                elif coord == 'width':
                    bndbox.append(int(object['annotation']['coord']['x'] + object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[0]))
                elif coord == 'height':
                    bndbox.append(int(object['annotation']['coord']['y'] + object['annotation']['coord'][coord])) # / float(data['Info']['RESOLUTION'].split('/')[1]))
            bndbox.append(self.classes.index(object['class_name']))
    # [3 channels, width, height], [[Xmin,Ymin,Xmax,Ymax,label_idx],....] // shape [N,5]
            width,height = data['Info']['RESOLUTION'].split('/')
            ret.append(bndbox)
    coord = [row[:-1] for row in ret]
    boxes = tv_tensors.BoundingBoxes(coord,format="XYXY",canvas_size=(int(height),int(width)))
    class_idx = [row[-1] for row in ret]
    class_name = [self.classes[i] for i in class_idx]
    transformed_class_names=[]
    transformed_img,transformed_boxes = self.transform(img,boxes)
    for i in class_name:
        if i[:3] == 'c_0' or i[:3] == 'c_2' or i[:3] =='c_9' or i[:3] =='c_4':
            transformed_class_names.append(i[0])
        else:
            transformed_class_names.append(i)
    return transformed_img.to('cuda'), transformed_boxes, [self.transformed_classes.index(i) for i in transformed_class_names]
def __len__(self):
    return len(self.anno_list)

`
this is my code. I think everything is good but I can’t get it what is wrong…

Is there anyone who know this problem?

please comment me and let me know how to solve this problem. thank you