I’m trying to train a model which should predict a bounding box around a hand. When I try to run the code I get the error message: Expected target boxes to be a tensor of shape [N, 4], got torch.Size([8, 1, 4]).
This issue has already been discussed here but I did not follow whether or not he actually opened a new issue as stated.
def resize_image_and_bbox(img, bboxes, new_height, new_width):
original_width, original_height = img.size
height_factor = original_height / new_height
width_factor = original_width / new_width
transform = transforms.Compose([
transforms.Resize((new_height, new_width)),
transforms.ToTensor()
])
img_as_tensor = transform(img)
# Scale x coordinates
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width_factor
# Scale y coordinates
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height_factor
return img_as_tensor, bboxes
class CustomDataset(Dataset):
def __init__(self, root_dir):
self.root_dir = root_dir
self.class_lbl = "hand"
self.label_paths = []
self.label_paths += glob.glob(os.path.join(root_dir, "labels", "VOC", "*.xml"))
def __len__(self):
return len(self.label_paths)
def __getitem__(self, index):
tree = ET.parse(self.label_paths[index])
root = tree.getroot()
img_path = os.path.join(self.root_dir,"images", root.find("path").text.split("\\")[-1])
img = PIL.Image.open(img_path)
# img = decode_image(img_path, ImageReadMode.RGB).numpy(force=True)
xmin = float(root.find("object/bndbox/xmin").text)
ymin = float(root.find("object/bndbox/ymin").text)
xmax = float(root.find("object/bndbox/xmax").text)
ymax = float(root.find("object/bndbox/ymax").text)
labels = torch.tensor([1])
bboxes = torch.tensor([[xmin, ymin, xmax, ymax]], dtype=torch.float32)
img_as_tensor, bboxes = resize_image_and_bbox(img, bboxes, IMAGE_HEIGHT, IMAGE_WIDTH)
print(bboxes)
target = {}
target["boxes"] = bboxes
target["labels"] = labels
return img_as_tensor, target
# Set up the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
# Train the model
for epoch in range(EPOCHS):
model.train()
train_loss = 0.0
# Training loop
for images, targets in dataloaders["train"]:
images = list(image.to(device) for image in images)
targets = {k: v.to(device) for k, v in targets.items()}
# Zero the gradients
optimizer.zero_grad()
# Forward pass
loss_dict = model(images, [targets])
losses = sum(loss for loss in loss_dict.values())
# Backward pass
losses.backward()
optimizer.step()
train_loss += losses.item()
# Update the learning rate
lr_scheduler.step()
print(f'Epoch: {epoch + 1}, Loss: {train_loss / len(dataloaders["train"])}')
print("Training complete!")