AssertionError: Results do not correspond to current coco set

Hi All,

I am new to ML and PyTorch and am struggling to stand up the torchvision tutorial MaskRCNN with my own use case. I have images with [0, N] instances of 3 classes each. I am attempting to identify and classify each segment. This breaks on the evaluation step of pycocotools/coco.py here:

assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
               'Results do not correspond to current coco set'

When I debugged it, I found that not only were the two sets drastically different sizes, but different types as well.

>>> annsImgIds
[tensor([344])]
>>> self.getImgIds()
{0, 1, 2, 3, 4, 5, 6, ... }  # len: 1633, the length of my dataset

I’m sure this is a simple error in the DataLoader but for the life of me I can’t find it and could use help debugging the types and the size. Other posts around this issue here and on GitHub often refer to saving json weights to the same location, but I’m not doing that (or I’m not aware I am doing that.)

Here is how I am loading the data:

    ds_train = MyDataset(
        data_dir="train/",
        annotations_path="annotations.json",
        transforms=get_transform(train=True),
    )
    ds_test = MyDataset(
        data_dir="train/",
        annotations_path="annotations.json",
        transforms=get_transform(train=False),
    )

    dataset_size = len(ds_train)
    indices = torch.randperm(dataset_size).tolist()
    # 80-20 split
    dataset = torch.utils.data.Subset(ds_train, indices[: -int(dataset_size * 0.2)])
    dataset_test = torch.utils.data.Subset(
        ds_test, indices[-int(dataset_size * 0.2) :]
    )

    data_loader = DataLoader(
        dataset,
        batch_size=2,
        shuffle=True,
        num_workers=1,
        collate_fn=my_collate,
        pin_memory=True,
    )
    data_loader_test = DataLoader(
        dataset_test,
        batch_size=1,
        shuffle=True,
        num_workers=1,
        collate_fn=my_collate,
        pin_memory=True,
    )

Here is my collate function:

def my_collate(batch):
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    data = torch.stack(data)
    # target = torch.LongTensor(target)  # threw errors until I removed it

    return [data, target]

and the MyDataset class:

class HubmapDataset(Dataset):
    def __init__(self, data_dir: str, annotations_path: str, transforms=None) -> None:
        super().__init__()
        self.data_dir = data_dir
        self.annotations = self._extract_annotations(annotations_path)
        self.transforms = transforms
        self._labels = {
            "blood_vessel": 0,
            "glomerulus": 1,
            "unsure": 2,
        }
        self.image_list = [
            f for f in os.listdir(data_dir) if f[:-4] in self.annotations
        ]  # might need to filter this down

    def _calc_area(self, box):
        return (box[2] - box[0]) * (box[3] - box[1])

    def _extract_annotations(self, fp) -> Dict[str, Any]:
        l = []
        with open(fp) as polygon:
            j = polygon.read()
            l = j.split("\n")
        z = {}
        for i, row in enumerate(l):
            try:
                r = json.loads(row)
                z[r["id"]] = r["annotations"]
            except json.JSONDecodeError as jde:
                print(i, jde, row)
        return z

    def _get_bbox(self, coords: np.ndarray):
        xmin = int(np.min(coords[:, 1]))
        ymin = int(np.min(coords[:, 0]))
        xmax = int(np.max(coords[:, 1]))
        ymax = int(np.max(coords[:, 0]))
        return [xmin, ymin, xmax, ymax]

    def __len__(self):
        return len(self.image_list)

    def _valid_labels(self, labels):
        try:
            torch.where(labels > 0)[0]
            return True
        except Exception:
            return False

    def _convert_labels(self, labels: np.ndarray):
        """Convert labels to be from 0-2"""
        labels_ = labels.copy()

        if len(np.unique(labels)) == 1:
            labels_ = np.zeros(labels.shape)

        elif np.min(labels) == 2:
            labels_ = labels - 2

        elif np.min(labels) == 1:
            labels_ = labels - 1

        elif np.min(labels) == 0 and len(np.where(labels == 1)[0]) == 0:
            labels[np.where(labels == 2)[0]] = 1
            labels_ = labels

        assert len(np.unique(labels_)) - 1 == np.max(labels_)

        return labels_

    def __getitem__(self, index) -> Any:
        img_name = self.image_list[index]
        image_path = os.path.join(self.data_dir, img_name)
        image = Image.open(image_path)

        annotations = self.annotations[img_name[:-4]]
        num_objs = len(annotations)

        # create the masks of size (512,512,num_objs)
        masks = np.zeros((num_objs, 512, 512), dtype=np.uint8)
        boxes = [None] * num_objs
        areas = [None] * num_objs
        labels = []

        # for each mask, add labels, boxes
        for i in range(num_objs):
            l_type = annotations[i]["type"]
            label_color = self._labels[l_type]
            coords = np.array(annotations[i]["coordinates"])[0]

            # set the mask coordinates equal to the label color
            m = np.zeros((512, 512))
            m[coords[:, 1], coords[:, 0]] = label_color
            cv2.fillPoly(m, pts=[coords], color=label_color)
            masks[i, :, :] = m

            # update the label
            labels.append(label_color)

            # create the bounding boxes
            bbox = self._get_bbox(coords)
            areas[i] = self._calc_area(bbox)
            boxes[i] = bbox

        labels = self._convert_labels(np.array(labels))

        # labels = np.array(list(np.unique(labels)))

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["area"] = torch.as_tensor(areas, dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64) - 1
        target["masks"] = torch.as_tensor(masks, dtype=torch.uint8)
        assert target["masks"].shape[0] == num_objs
        target["image_id"] = torch.tensor([index])

        if self.transforms is not None:
            image, target = self.transforms(image, target)
        else:
            image = PILToTensor()(image)

        return image, target

Training Loop:


    running_loss = 0
    num_epochs = 10

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)

    print("Complete!")

Any insight or help would be greatly appreciated!

Hi Duck,

I had the same issue with the fine tuning example under windows.
I will explain what worked for me assuming you followed the steps in the example.

for me it worked to change the type of image_id in the dataset class:

#image_id = torch.tensor([idx])
image_id = int(idx)

For your HubmapDataset class, that would be in this line:

target["image_id"] = torch.tensor([index])

Then you get a downstream error in the coco_utils file, because it expects a tensor:

#image_id = targets["image_id"].item()      
image_id = targets["image_id"] 

after that it worked. I hope this helps you.

Cheers

I have this for my dataset class

class VOCDataset(Dataset):
    
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        
        self.image_ids = dataframe['img_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
    
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        records = self.df[self.df['img_id'] == image_id]
        
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = func(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        rows, cols = image.shape[:2]
        
        boxes = records[['x1', 'y1', 'x2', 'y2']].values
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        
        label = records['integer_label'].values
        labels = torch.as_tensor(label, dtype=torch.int64)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        # target['masks'] = None
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd
        
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            # Find and correct invalid bounding boxes
            for i in range(len(sample['bboxes'])):
                x1, y1, x2, y2 = sample['bboxes'][i]
                if x1 >= x2:
                    x1, x2 = x2 - 1, x1 + 1
                if y1 >= y2:
                    y1, y2 = y2 - 1, y1 + 1
                sample['bboxes'][i] = [x1, y1, x2, y2]

            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1,0)
            
            return image, target
        
    def __len__(self) -> int:
        return self.image_ids.shape[0]

def get_transform_train():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        ToTensorV2(p=1.0)
    ], bbox_params={'format':'pascal_voc', 'label_fields': ['labels']})

def get_transform_valid():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields':['labels']})

def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = VOCDataset(train_data, '/kaggle/input/crop-data/dataset/train' , get_transform_train())
valid_dataset = VOCDataset(val_data, '/kaggle/input/crop-data/dataset/validate', get_transform_valid())

# split the dataset in train and test set
# indices = torch.randperm(len(train_dataset)).tolist()

train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

and I still get the same assertion error

in that case do I change the image id like so

target['image_id'] = int(index) for target['image_id'] = torch.tensor([index])