Hi All,
I am new to ML and PyTorch and am struggling to stand up the torchvision tutorial MaskRCNN with my own use case. I have images with [0, N] instances of 3 classes each. I am attempting to identify and classify each segment. This breaks on the evaluation step of pycocotools/coco.py
here:
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
When I debugged it, I found that not only were the two sets drastically different sizes, but different types as well.
>>> annsImgIds
[tensor([344])]
>>> self.getImgIds()
{0, 1, 2, 3, 4, 5, 6, ... } # len: 1633, the length of my dataset
I’m sure this is a simple error in the DataLoader but for the life of me I can’t find it and could use help debugging the types and the size. Other posts around this issue here and on GitHub often refer to saving json
weights to the same location, but I’m not doing that (or I’m not aware I am doing that.)
Here is how I am loading the data:
ds_train = MyDataset(
data_dir="train/",
annotations_path="annotations.json",
transforms=get_transform(train=True),
)
ds_test = MyDataset(
data_dir="train/",
annotations_path="annotations.json",
transforms=get_transform(train=False),
)
dataset_size = len(ds_train)
indices = torch.randperm(dataset_size).tolist()
# 80-20 split
dataset = torch.utils.data.Subset(ds_train, indices[: -int(dataset_size * 0.2)])
dataset_test = torch.utils.data.Subset(
ds_test, indices[-int(dataset_size * 0.2) :]
)
data_loader = DataLoader(
dataset,
batch_size=2,
shuffle=True,
num_workers=1,
collate_fn=my_collate,
pin_memory=True,
)
data_loader_test = DataLoader(
dataset_test,
batch_size=1,
shuffle=True,
num_workers=1,
collate_fn=my_collate,
pin_memory=True,
)
Here is my collate function:
def my_collate(batch):
data = [item[0] for item in batch]
target = [item[1] for item in batch]
data = torch.stack(data)
# target = torch.LongTensor(target) # threw errors until I removed it
return [data, target]
and the MyDataset class:
class HubmapDataset(Dataset):
def __init__(self, data_dir: str, annotations_path: str, transforms=None) -> None:
super().__init__()
self.data_dir = data_dir
self.annotations = self._extract_annotations(annotations_path)
self.transforms = transforms
self._labels = {
"blood_vessel": 0,
"glomerulus": 1,
"unsure": 2,
}
self.image_list = [
f for f in os.listdir(data_dir) if f[:-4] in self.annotations
] # might need to filter this down
def _calc_area(self, box):
return (box[2] - box[0]) * (box[3] - box[1])
def _extract_annotations(self, fp) -> Dict[str, Any]:
l = []
with open(fp) as polygon:
j = polygon.read()
l = j.split("\n")
z = {}
for i, row in enumerate(l):
try:
r = json.loads(row)
z[r["id"]] = r["annotations"]
except json.JSONDecodeError as jde:
print(i, jde, row)
return z
def _get_bbox(self, coords: np.ndarray):
xmin = int(np.min(coords[:, 1]))
ymin = int(np.min(coords[:, 0]))
xmax = int(np.max(coords[:, 1]))
ymax = int(np.max(coords[:, 0]))
return [xmin, ymin, xmax, ymax]
def __len__(self):
return len(self.image_list)
def _valid_labels(self, labels):
try:
torch.where(labels > 0)[0]
return True
except Exception:
return False
def _convert_labels(self, labels: np.ndarray):
"""Convert labels to be from 0-2"""
labels_ = labels.copy()
if len(np.unique(labels)) == 1:
labels_ = np.zeros(labels.shape)
elif np.min(labels) == 2:
labels_ = labels - 2
elif np.min(labels) == 1:
labels_ = labels - 1
elif np.min(labels) == 0 and len(np.where(labels == 1)[0]) == 0:
labels[np.where(labels == 2)[0]] = 1
labels_ = labels
assert len(np.unique(labels_)) - 1 == np.max(labels_)
return labels_
def __getitem__(self, index) -> Any:
img_name = self.image_list[index]
image_path = os.path.join(self.data_dir, img_name)
image = Image.open(image_path)
annotations = self.annotations[img_name[:-4]]
num_objs = len(annotations)
# create the masks of size (512,512,num_objs)
masks = np.zeros((num_objs, 512, 512), dtype=np.uint8)
boxes = [None] * num_objs
areas = [None] * num_objs
labels = []
# for each mask, add labels, boxes
for i in range(num_objs):
l_type = annotations[i]["type"]
label_color = self._labels[l_type]
coords = np.array(annotations[i]["coordinates"])[0]
# set the mask coordinates equal to the label color
m = np.zeros((512, 512))
m[coords[:, 1], coords[:, 0]] = label_color
cv2.fillPoly(m, pts=[coords], color=label_color)
masks[i, :, :] = m
# update the label
labels.append(label_color)
# create the bounding boxes
bbox = self._get_bbox(coords)
areas[i] = self._calc_area(bbox)
boxes[i] = bbox
labels = self._convert_labels(np.array(labels))
# labels = np.array(list(np.unique(labels)))
target = {}
target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
target["area"] = torch.as_tensor(areas, dtype=torch.float32)
target["labels"] = torch.as_tensor(labels, dtype=torch.int64) - 1
target["masks"] = torch.as_tensor(masks, dtype=torch.uint8)
assert target["masks"].shape[0] == num_objs
target["image_id"] = torch.tensor([index])
if self.transforms is not None:
image, target = self.transforms(image, target)
else:
image = PILToTensor()(image)
return image, target
Training Loop:
running_loss = 0
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
print("Complete!")
Any insight or help would be greatly appreciated!