I'm trying to perform transfer learning on my Mask R_CNN model but receiving this error "AssertionError: targets should not be none when in training model"

This is my code:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory

Define device

device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)

Mount Google Drive if needed

from google.colab import drive
drive.mount(‘/content/drive’)

License: BSD

Author: Sasank Chilamkurthy

cudnn.benchmark = True
plt.ion() # interactive mode

Define your custom dataset class

class CustomDataset(torch.utils.data.Dataset):
def init(self, data_dir, transform=None):
self.data_dir = data_dir
self.transform = transform
self.images = # List to store image paths
self.labels = # List to store corresponding labels

    # Iterate over your dataset directory to collect image paths and labels
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if os.path.isdir(class_dir):
            for image_name in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_name)
                self.images.append(image_path)
                self.labels.append(class_name)

def __len__(self):
    return len(self.images)

def __getitem__(self, idx):
    image_path = self.images[idx]
    label = self.labels[idx]

    # Open image using PIL
    image = Image.open(image_path).convert('RGB')

    # Apply transformations if specified
    if self.transform:
        image = self.transform(image)

    # Convert label to integer index
    label_idx = class_names.index(label)

    return image, label_idx

Define data transformations

data_transforms = {
‘train’: transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
‘val’: transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}

Specify the path to your custom dataset directory

data_dir = ‘/content/drive/MyDrive/OBJDataset.v1i.tensorflow’

Create custom dataset instances for training and validation

custom_datasets = {x: CustomDataset(os.path.join(data_dir, x), transform=data_transforms)
for x in [‘train’, ‘val’]}

Create data loaders for training and validation

dataloaders = {x: torch.utils.data.DataLoader(custom_datasets, batch_size=4, shuffle=True, num_workers=4)
for x in [‘train’, ‘val’]}

Get the sizes of the datasets

dataset_sizes = {x: len(custom_datasets) for x in [‘train’, ‘val’]}

Get the class names

class_names = sorted(custom_datasets[‘train’].labels)

Define the function to display images

def imshow(inp, title=None):
“”“Display image for Tensor.”“”
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated

Get a batch of training data

inputs, labels = next(iter(dataloaders[‘train’]))

Make a grid from the batch

out = torchvision.utils.make_grid(inputs)

Convert class indices to class names

class_names_list = [class_names for x in labels]

Display the images with their corresponding class names

imshow(out, title=class_names_list)

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)

    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0

        for batch_idx, (inputs, targets) in enumerate(dataloaders[phase]):
            inputs = inputs.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == targets.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    print()

time_elapsed = time.time() - since
print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
print(f'Best val Acc: {best_acc:4f}')

model.load_state_dict(best_model_wts)
return model

def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()

with torch.no_grad():
    for i, (inputs, labels) in enumerate(dataloaders['val']):
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        for j in range(inputs.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title(f'predicted: {class_names[preds[j]]}')
            imshow(inputs.cpu().data[j])

            if images_so_far == num_images:
                model.train(mode=was_training)
                return
    model.train(mode=was_training)

Initialize the Mask R-CNN model

model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(pretrained=True)

Modify the classification and box regression heads

in_features = model_ft.roi_heads.box_predictor.cls_score.in_features
model_ft.roi_heads.box_predictor = nn.Sequential(
nn.Linear(in_features, 7), # Modify output features to match your task
nn.Softmax(dim=1) # Apply softmax if necessary
)

Optionally, modify the mask predictor for instance segmentation

in_features_mask = model_ft.roi_heads.mask_predictor.conv5_mask.in_channels

hidden_layer = 256 # Example

model_ft.roi_heads.mask_predictor = nn.Sequential(

nn.ConvTranspose2d(in_features_mask, hidden_layer, kernel_size=2, stride=2),

nn.ReLU(inplace=True),

nn.Conv2d(hidden_layer, 7, kernel_size=1, stride=1) # Modify output features to match your task

)

Move the model to the device

model_ft = model_ft.to(device)

from torchvision.models.detection import MaskRCNN_ResNet50_FPN_Weights, MaskRCNN_ResNet50_FPN_V2_Weights
weights = MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=weights)

Define loss function (criterion)

criterion = nn.CrossEntropyLoss()

Define optimizer

optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

Define learning rate scheduler

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

Train the model

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)

will most likely fail since the targets are not passed to the forward method as the error message points out.

New code :

forward

            with torch.set_grad_enabled(phase == 'train'):
              # Pass both inputs and targets to the model
              outputs = model(inputs, targets)
              # For Mask R-CNN, the model returns a dictionary containing the loss
              loss = outputs['loss']
              
              # Perform backward pass and optimization only during training phase
              if phase == 'train':
                loss.backward()
                optimizer.step()

            # statistic
            running_loss += loss.item() * inputs.size(0)

I changed the code but I am not sure if this is the correct approach. After the changes, I am receiving this new error "

IndexError Traceback (most recent call last)
in <cell line: 252>()
250 model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=weights)
251
→ 252 model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
253 num_epochs=25)

3 frames
in train_model(model, criterion, optimizer, scheduler, num_epochs)
155 with torch.set_grad_enabled(phase == ‘train’):
156 # Pass both inputs and targets to the model
→ 157 outputs = model(inputs, targets)
158 # For Mask R-CNN, the model returns a dictionary containing the loss
159 loss = outputs[‘loss’]

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
→ 1511 return self._call_impl(*args, **kwargs)
1512
1513 def _call_impl(self, *args, **kwargs):

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1520 return forward_call(*args, **kwargs)
1521
1522 try:

/usr/local/lib/python3.10/dist-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
63 else:
64 for target in targets:
—> 65 boxes = target[“boxes”]
66 if isinstance(boxes, torch.Tensor):
67 torch._assert(

IndexError: too many indices for tensor of dimension 0