I used PyTorch version 2.3.0, other files like engine.py utils.py… get from github
I don’t know if the problem in the form of the dataset that train MaskRCNN or what?
but I think that the structure of dataset is good
this is my dataset.py thar generate the data
# dataset.py
import torch
import torch.utils.data
import torchvision.transforms as transforms
from PIL import Image
import os
import numpy as np
class Dataset(torch.utils.data.Dataset):
def __init__(self, grouped_df, df, image_folder, processor, model, device):
self.df = df
self.image_folder = image_folder
self.grouped_df = grouped_df
self.processor = processor
self.model = model
self.device = device
formatted_array = []
for num in df["imageid"].unique():
num_str = str(num)
if len(num_str) == 1:
formatted_array.append("00" + num_str)
elif len(num_str) == 2:
formatted_array.append("0" + num_str)
else:
formatted_array.append(num_str)
self.image_ids = formatted_array
self.classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
self.masks = []
for idx in range(len(self.image_ids)):
image_name = self.image_ids[idx]
image_path = os.path.join(self.image_folder, image_name + '.jpg')
image = Image.open(image_path)
resized_image = image.resize((350, 350))
resized_array = np.array(resized_image).astype(np.uint8)
# SAM to find mask
inputs = self.processor(resized_array, return_tensors="pt").to(self.device)
image_embeddings = self.model.get_image_embeddings(inputs["pixel_values"])
inputs.pop("pixel_values", None)
inputs.update({"image_embeddings": image_embeddings})
with torch.no_grad():
outputs = self.model(**inputs, multimask_output=False)
masks = self.processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
masks = torch.as_tensor(np.array(list(map(np.array, masks)), dtype=np.uint8))
masks = masks.squeeze(dim=0).squeeze(dim=1)
self.masks.append(masks)
def __getitem__(self, idx):
image_name = self.image_ids[idx]
image_path = os.path.join(self.image_folder, image_name + '.jpg')
image = Image.open(image_path)
image_id = torch.tensor([idx])
# Resize image
resized_image = image.resize((350, 350))
resized_array = np.array(resized_image).astype(np.uint8)
# Get classes
labels = torch.as_tensor(self.grouped_df["classid"][idx], dtype=torch.int64)
# Get boxes
boxes = torch.as_tensor(self.grouped_df["resized_bbox"][idx], dtype=torch.float32)
# Calculate the area
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# Get mask
masks = self.masks[idx]
# Suppose all instances are not crowd
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
# Create target
target = {}
target["iscrowd"] = iscrowd
target['boxes'] = boxes
target["labels"] = labels
target["masks"] = masks
target["area"] = area
target["image_id"] = image_id
return transforms.ToTensor()(resized_array), target
def __len__(self):
return len(self.image_ids)
I save it as .pkl then loaded to split the data as you can see here
from torch.utils.data import Subset, DataLoader
torch.manual_seed(1)
indices = torch.randperm(len(loaded_dataset)).tolist()
test_split = 0.2
size = int(len(loaded_dataset) * test_split)
dataset_train = Subset(loaded_dataset, indices[:-size])
dataset_test = Subset(loaded_dataset, indices[-size:])
data_loader_train = DataLoader(
dataset_train, batch_size=4, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = DataLoader(
dataset_test, batch_size=4, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
load MaskRCNN model and train it
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 11
model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
in_features = model_ft.roi_heads.box_predictor.cls_score.in_features
model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
in_features_mask = model_ft.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
model_ft.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
model_ft.to(device)
params = [p for p in model_ft.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=5,
gamma=0.1)
import torch.multiprocessing as mp
if __name__ == "__main__":
mp.set_start_method('spawn')
num_epochs = 10
for epoch in range(num_epochs):
train_one_epoch(model_ft, optimizer, data_loader_train, device, epoch, print_freq=100)
lr_scheduler.step()
evaluate(model, data_loader_test, device=device)