This is my code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectoryDefine device
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
Mount Google Drive if needed
from google.colab import drive
drive.mount(‘/content/drive’)License: BSD
Author: Sasank Chilamkurthy
cudnn.benchmark = True
plt.ion() # interactive modeDefine your custom dataset class
class CustomDataset(torch.utils.data.Dataset):
def init(self, data_dir, transform=None):
self.data_dir = data_dir
self.transform = transform
self.images = # List to store image paths
self.labels = # List to store corresponding labels# Iterate over your dataset directory to collect image paths and labels for class_name in os.listdir(data_dir): class_dir = os.path.join(data_dir, class_name) if os.path.isdir(class_dir): for image_name in os.listdir(class_dir): image_path = os.path.join(class_dir, image_name) self.images.append(image_path) self.labels.append(class_name) def __len__(self): return len(self.images) def __getitem__(self, idx): image_path = self.images[idx] label = self.labels[idx] # Open image using PIL image = Image.open(image_path).convert('RGB') # Apply transformations if specified if self.transform: image = self.transform(image) # Convert label to integer index label_idx = class_names.index(label) return image, label_idx
Define data transformations
data_transforms = {
‘train’: transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
‘val’: transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}Specify the path to your custom dataset directory
data_dir = ‘/content/drive/MyDrive/OBJDataset.v1i.tensorflow’
Create custom dataset instances for training and validation
custom_datasets = {x: CustomDataset(os.path.join(data_dir, x), transform=data_transforms)
for x in [‘train’, ‘val’]}Create data loaders for training and validation
dataloaders = {x: torch.utils.data.DataLoader(custom_datasets, batch_size=4, shuffle=True, num_workers=4)
for x in [‘train’, ‘val’]}Get the sizes of the datasets
dataset_sizes = {x: len(custom_datasets) for x in [‘train’, ‘val’]}
Get the class names
class_names = sorted(custom_datasets[‘train’].labels)
Define the function to display images
def imshow(inp, title=None):
“”“Display image for Tensor.”“”
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updatedGet a batch of training data
inputs, labels = next(iter(dataloaders[‘train’]))
Make a grid from the batch
out = torchvision.utils.make_grid(inputs)
Convert class indices to class names
class_names_list = [class_names for x in labels]
Display the images with their corresponding class names
imshow(out, title=class_names_list)
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print(f'Epoch {epoch}/{num_epochs - 1}') print('-' * 10) for phase in ['train', 'val']: if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 for batch_idx, (inputs, targets) in enumerate(dataloaders[phase]): inputs = inputs.to(device) targets = targets.to(device) optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss = criterion(outputs, targets) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) _, preds = torch.max(outputs, 1) running_corrects += torch.sum(preds == targets.data) epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s') print(f'Best val Acc: {best_acc:4f}') model.load_state_dict(best_model_wts) return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()with torch.no_grad(): for i, (inputs, labels) in enumerate(dataloaders['val']): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) for j in range(inputs.size()[0]): images_so_far += 1 ax = plt.subplot(num_images//2, 2, images_so_far) ax.axis('off') ax.set_title(f'predicted: {class_names[preds[j]]}') imshow(inputs.cpu().data[j]) if images_so_far == num_images: model.train(mode=was_training) return model.train(mode=was_training)
Initialize the Mask R-CNN model
model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(pretrained=True)
Modify the classification and box regression heads
in_features = model_ft.roi_heads.box_predictor.cls_score.in_features
model_ft.roi_heads.box_predictor = nn.Sequential(
nn.Linear(in_features, 7), # Modify output features to match your task
nn.Softmax(dim=1) # Apply softmax if necessary
)Optionally, modify the mask predictor for instance segmentation
in_features_mask = model_ft.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256 # Example
model_ft.roi_heads.mask_predictor = nn.Sequential(
nn.ConvTranspose2d(in_features_mask, hidden_layer, kernel_size=2, stride=2),
nn.ReLU(inplace=True),
nn.Conv2d(hidden_layer, 7, kernel_size=1, stride=1) # Modify output features to match your task
)
Move the model to the device
model_ft = model_ft.to(device)
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_Weights, MaskRCNN_ResNet50_FPN_V2_Weights
weights = MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=weights)Define loss function (criterion)
criterion = nn.CrossEntropyLoss()
Define optimizer
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
Define learning rate scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
Train the model
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)