I get this same error. I am trying to train a Mask RCNN.
First I load my training data like this:
rescaled_images_folder = ‘…’
rescaled_json_folder = ‘…’
class ObjectDetectionDataset(Dataset):
def init(self, image_folder, json_folder, transform=None):
self.image_folder = image_folder
self.json_folder = json_folder
self.transform = transform
self.image_files = [file for file in os.listdir(self.image_folder) if file.endswith(‘.png’)] # Update file extension
self.json_files = [file for file in os.listdir(self.json_folder) if file.endswith(‘.json’)]
def __len__(self):
return len(self.image_files)
def bbox_to_mask(self, bboxes, width, height):
masks = []
for bbox_info in bboxes:
x_min, y_min, x_max, y_max = bbox_info # Use the values directly from bbox_info
mask = np.zeros((height, width), dtype=np.uint8)
mask[y_min:y_max, x_min:x_max] = 1
masks.append(mask)
return masks
def __getitem__(self, idx):
image_file = os.path.join(self.image_folder, self.image_files[idx])
json_file = os.path.join(self.json_folder, self.json_files[idx])
# Load the image as a PIL image
image = Image.open(image_file).convert('RGB')
# Load the JSON file and extract bounding boxes
with open(json_file, 'r') as file:
json_data = json.load(file)
bboxes = json_data['bounding_boxes']
# Extract labels from bounding boxes and convert them to integers
labels = [int(bbox['label']) for bbox in bboxes]
# Convert bounding boxes to the expected format (list of lists)
bboxes = [[bbox['bbox'][0], bbox['bbox'][1], bbox['bbox'][2], bbox['bbox'][3]] for bbox in bboxes]
# Convert bounding boxes to masks
image_width = json_data['imageWidth']
image_height = json_data['imageHeight']
masks = self.bbox_to_mask(bboxes, image_width, image_height)
targets = {
'image': image,
'labels': labels,
'boxes': bboxes, # Modify this to be a list of bounding boxes [x_min, y_min, x_max, y_max]
'masks': masks,
}
if self.transform:
image = self.transform(image)
return image.permute(2, 0, 1), targets
def collate_fn(batch):
images = [item[0] for item in batch] # Extract the image from the tuple
targets = [item[1] for item in batch] # Extract the target dictionary
all_labels = [target['labels'] for target in targets]
all_bboxes = [target['boxes'] for target in targets]
all_masks = [target['masks'] for target in targets]
# Convert the extracted bounding boxes to tensors
bbox_tensors = [torch.tensor(bbox, dtype=torch.float32) for bbox in all_bboxes]
# Pad bounding boxes
padded_bboxes = pad_sequence(bbox_tensors, batch_first=True)
# Convert list of masks to a single NumPy array and then to a tensor
#padded_masks = pad_sequence([torch.tensor(np.array(mask)) for mask in all_masks], batch_first=True)
mask_tensors = [torch.tensor(mask, dtype=torch.float32) for mask in all_masks]
# Pad masks
padded_masks = pad_sequence(mask_tensors, batch_first=True)
# Convert labels to a tensor with padding
max_num_labels = max(len(labels) for labels in all_labels)
common_padding_value = -1
padded_labels = [labels + [common_padding_value] * (max_num_labels - len(labels)) for labels in all_labels]
padded_labels = torch.tensor(padded_labels, dtype=torch.long)
targets = {
'labels': padded_labels,
'boxes': padded_bboxes,
'masks': padded_masks}
return images, targets
def calculate_mean_std(image_folder):
image_files = [file for file in os.listdir(image_folder) if file.endswith(‘.png’)]
num_images = len(image_files)
pixel_sum = np.zeros(3)
pixel_squared_sum = np.zeros(3)
for image_file in image_files:
image_path = os.path.join(image_folder, image_file)
image = Image.open(image_path).convert('RGB')
pixel_data = np.array(image, dtype=np.float32) / 255.0 # Normalize pixel values to [0, 1]
# Accumulate pixel values and squared pixel values for each channel (R, G, B)
pixel_sum += np.sum(pixel_data, axis=(0, 1))
pixel_squared_sum += np.sum(pixel_data ** 2, axis=(0, 1))
# Calculate mean and standard deviation for each channel
pixel_mean = pixel_sum / (num_images * image.size[0] * image.size[1])
pixel_std = np.sqrt((pixel_squared_sum / (num_images * image.size[0] * image.size[1])) - pixel_mean ** 2)
return pixel_mean, pixel_std
Calculate mean and std
mean, std = calculate_mean_std(rescaled_images_folder)
print(“Mean:”, mean)
print(“Standard Deviation:”, std)
Define transformations (modify as needed)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
Create the dataset
dataset = ObjectDetectionDataset(image_folder=rescaled_images_folder, json_folder=rescaled_json_folder, transform=transform)
Create the DataLoader
batch_size = 5
train_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
Then I am trying the simplest training/validation:
Define devices
device = torch.device(‘cuda’) if torch.cuda.is_available() else torch.device(‘cpu’)
Define the model
model = maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT)
Move model to GPU
model.to(device)
Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
Define loss functions
classification_loss = nn.CrossEntropyLoss()
regression_loss = nn.SmoothL1Loss()
mask_loss = nn.BCEWithLogitsLoss() # Dice + CrossEntropy / IoU
num_epochs = 2
for epoch in range(num_epochs):
model.train()
total_loss = 0.0
for images, targets in train_dataloader:
images = [image.to(device) for image in images]
targets = {k: v.to(device) for k, v in targets.items()}
optimizer.zero_grad()
# Forward pass
loss_dict = model(images, targets)
# Compute the total loss (classification loss + regression loss + mask loss)
loss = sum(loss for loss in loss_dict.values())
# Backward pass and optimization
loss.backward()
optimizer.step()
total_loss += loss.item()
# Print training statistics
print(f"Epoch [{epoch + 1}/{num_epochs}] Loss: {total_loss / len(train_dataloader)}")
# Update the learning rate scheduler
lr_scheduler.step()
model.eval()
validation_loss = 0.0
with torch.no_grad():
for images, labels, bboxes, masks, num_teeth in validation_dataloader:
images = [image.to(device) for image in images]
targets = [{'labels': labels[idx], 'boxes': bboxes[idx], 'masks': masks[idx]} for idx in range(len(labels))]
loss_dict = model(images, targets)
loss = sum(loss for loss in loss_dict.values())
validation_loss += loss.item()
# Print validation loss
print(f"Validation Loss: {validation_loss / len(validation_dataloader)}")
However, I keep getting the error:
loss_dict = model(images, targets)
…\Anaconda\lib\site-packages\torchvision\models\detection\generalized_rcnn.py", line 65, in forward
boxes = target[“boxes”]
TypeError: string indices must be integers
Could someone help me with this?