I am running below script but getting error. Can anyone fix it?
import torch
import torchvision
from torchvision import transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.vgg import vgg16
import json
Define the dataset location
train_dataset = torchvision.datasets.VOCDetection(
root=‘./VOC2007’,
year=‘2007’,
image_set=‘train’,
download=True
)
label_map = {
“background”: 0,
“aeroplane”: 1,
“bicycle”: 2,
“bird”: 3,
“boat”: 4,
“bottle”: 5,
“bus”: 6,
“car”: 7,
“cat”: 8,
“chair”: 9,
“cow”: 10,
“diningtable”: 11,
“dog”: 12,
“horse”: 13,
“motorbike”: 14,
“person”: 15,
“pottedplant”: 16,
“sheep”: 17,
“sofa”: 18,
“train”: 19,
“tvmonitor”: 20
}
Define the transform pipeline with random erasing
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=‘random’),
transforms.ToTensor,
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
Create a data loader for the training dataset
def collate_fn(batch):
return tuple(zip(*batch))
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=1,
shuffle=True,
num_workers=4,
collate_fn=collate_fn
)
Define the Fast R-CNN model with VGG16 as the backbone network
model = fasterrcnn_resnet50_fpn(num_classes=21, pretrained_backbone=False, box_detections_per_img=1)
backbone = vgg16(pretrained=True).features
backbone.out_channels = 256
model.backbone = backbone
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
model.to(device)
print(model)
Define the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
Pass the data to the model for training
for images, targets in train_loader:
# Move the images and targets to the device
images = [transforms.ToTensor()(image).to(device) for image in images]
# images = transforms.ToTensor()(images).cuda()
# images = list(image.cuda() for image in images)
# targets = [{k: v for k, v in t.items()} for t in targets]
# convert PIL image to tensor
# image_tensor = transforms.ToTensor()(image)
# get the bounding box coordinates and class labels
boxes = []
labels = []
for tr in range(len(targets)):
for obj in targets[tr]['annotation']['object']:
bbox = obj['bndbox']
xmin = float(bbox['xmin'])
ymin = float(bbox['ymin'])
xmax = float(bbox['xmax'])
ymax = float(bbox['ymax'])
boxes.append([xmin, ymin, xmax, ymax])
labels.append(obj['name'])
# convert the bounding boxes and labels to tensors
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.as_tensor([label_map[label] for label in labels], dtype=torch.int64)
# create the target dictionary
targets_list = [{'boxes': boxes[i:i+1].cuda(), 'labels': labels[i:i+1].cuda()} for i in range(len(labels))]
print(len(images), len(targets_list[0]['boxes']), len(targets_list[0]['labels']))
# Assuming you have a loaded model called 'model'
print(images[0].size())
outputs = model(images, targets_list)
# iterate over the outputs and calculate losses
loss_dict = {}
for output in outputs:
loss_dict.update(output['loss_classifier'])
loss_dict.update(output['loss_box_reg'])
loss_dict.update(output['loss_objectness'])
loss_dict.update(output['loss_rpn_box_reg'])
losses = sum(loss for loss in loss_dict.values())
optimizer.zero_grad()
losses.backward()
optimizer.step()