Training Faster R-CNN model with COCO dataset has been consistently unsuccessful

My computer has downloaded the COCO dataset and now I want to use PyTorch to load the dataset and train a Faster R-CNN object detection model. However, there seems to be a problem with loading the data. Can you help me solve this issue?The following is my code.

import torchvision
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
import torch.optim as optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

## dataloader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_data_dir = '../data/coco/train2017'
train_ann_file = '../data/coco/annotations/instances_train2017.json'
val_data_dir = '../data/coco/val2017'
val_ann_file = '../data/coco/annotations/instances_val2017.json'

train_dataset = CocoDetection(root=train_data_dir, annFile=train_ann_file, transform=transform)
test_dataset = CocoDetection(root=val_data_dir, annFile=val_ann_file, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

## model
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)
model = FasterRCNN(backbone, num_classes=80, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)

## optimizeer and others
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
criterion = nn.SmoothL1Loss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


## train model
model.train()
num_epochs = 10
for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if i % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Total Loss: {losses.item()}")