Hello, I am having trouble using the Custome Dataset in PyTorch, mainly how the labels should be displayed for the fasterrcnn_resnet model. Am I supposed to list all the labels that are in the data or just the labels that are specific to that image? For example I have the classes ["Truck","Car","Jeep"]
and labeled them 1,2,3. Lets say I have an image of a truck so my label for my ‘target’ that is returned should be tensor([1]) or should it be tensor([1,2,3])
The documentation says as follows
During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing:
- boxes (
FloatTensor[N, 4]
): the ground-truth boxes in[x1, y1, x2, y2]
format, with0 <= x1 < x2 <= W
and0 <= y1 < y2 <= H
.** - labels (
Int64Tensor[N]
): the class label for each ground-truth box**
A single batch of targets that are being passed to the model
[{'boxes': tensor([[214, 385, 847, 883]]), 'labels': tensor([1]), 'image_id': tensor(8), 'area': tensor(1345348), 'iscrowd': tensor([0])}, {'boxes': tensor([[ 82, 250, 919, 801]]), 'labels': tensor([1]), 'image_id': tensor(0), 'area': tensor(1052051), 'iscrowd': tensor([0])}, {'boxes': tensor([[192, 444, 922, 984]]), 'labels': tensor([3]), 'image_id': tensor(25), 'area': tensor(1590792), 'iscrowd': tensor([0])}]
This is my current code for the customer data set
# Create Custom DataSet
class ObjectDetectionDataset(torch.utils.data.Dataset):
def __init__(self,phase:str,transforms=None):
image_dir=f"/content/drive/My Drive/{phase} set/images"
annotations_dir=f"/content/drive/My Drive/{phase} set/labels"
self.image_dir = Path(image_dir) # image directory path
self.annotations_dir = Path(annotations_dir) # annotations directory path
# self.transforms = transforms # Transforms
self.imgs_names = list(sorted(self.image_dir.iterdir())) # create list of path objects to photos, sorted()
self.imgs_names = [Path(self.image_dir/img_name)for img_name in self.imgs_names]
self.annotation_names = list(sorted(self.annotations_dir.iterdir()))
self.annotation_names = [Path(annotations_dir/ann_name)for ann_name in self.annotation_names]
# What happens when __getitem__ is used on the object, example dataset_object[0]
def __getitem__(self,index):
# Grabing path to image at "index" and transforming to tensor between 0-1
img_path = self.imgs_names[index] # Getting Image path object at idx and display the image
image_pic = Image.open(img_path)
rows = image_pic.size[0]
columns = image_pic.size[1]
resize_amount = (1024,1024)
image_resized = transforms.Resize(resize_amount)(image_pic)
image = transforms.ToTensor()(image_resized)
#----------------------------------------------------------------------------------------------------
# Grabing path to bounding box at "index" and grabbing its contents
annotation_path = self.annotation_names[index]
annotation_tree = ET.parse(annotation_path)
bound_box_obj = annotation_tree.find("object").find("bndbox") # find and returns contents in bndbox
#----------------------------------------------------------------------------------------------------
#Getting the x and y values of the bounding box
resize_ratio_rows = resize_amount[0]/ rows
resize_ratio_columns = resize_amount[1]/columns
x_max = int(bound_box_obj.find('xmax').text)
x_min = int(bound_box_obj.find('xmin').text)
y_max = int(bound_box_obj.find('ymax').text)
y_min = int(bound_box_obj.find('ymin').text)
x2 = round(x_max * resize_ratio_rows)
x1 = round(x_min * resize_ratio_rows)
y2 = round(y_max * resize_ratio_columns)
y1 = round(y_min * resize_ratio_columns)
bounding_box = torch.tensor((x1,y1,x2,y2))
bounding_box = bounding_box.unsqueeze(0)
#----------------------------------------------------------------------------------------------------
# Getting Label
label_list =["Truck","Car","Jeep"]
annotation_path = self.annotation_names[index]
annotation_tree = ET.parse(annotation_path)
label_name = annotation_tree.find("object").find("name").text
label = 0
if label_name in label_list:
label = (label_list.index(label_name)) +1
label = torch.tensor([label],dtype=torch.int64)
#----------------------------------------------------------------------------------------------------
# Calculating Area
area = torch.tensor((x1+x2)*(y1+y2))
#----------------------------------------------------------------------------------------------------
# Creating Image_Ids
image_id = torch.tensor(index)
#----------------------------------------------------------------------------------------------------
# Setting "iscrowd to zero"
iscrowd = torch.zeros(1,dtype=torch.int64)
#----------------------------------------------------------------------------------------------------
# Creating Tagets Dictionary
target = {}
target["boxes"] = bounding_box
target["labels"] = label
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
return image,target
def __len__(self):
return len(self.imgs_names)