I am trying to train a network using multiple bounding boxes. One image can have more than one bounding box associated with it. This is the form a bounding box looks like when there are 3 objects in the image:
tensor([[[444., 220., 27., 65.],
[468., 220., 26., 66.],
[415., 224., 20., 33.]]])
I am using a simple training loop while trying to get it to work:
for epoch in range(2):
for i, data in enumerate(trainloader, 0):
images, target = data
box = target['nboxes'].view(1, -1)
optimizer.zero_grad()
out_box = net(images)
loss = criterion_bbox(out_box, box)
loss.backward()
optimizer.step()
The box = target['nboxes'].view(1, -1)
line outputs the boxes as the boxes with the shape: torch.Size([1, 12])
. I am just wondering if this is the best/correct approach?
My network is:
class Net(nn.Module):
def __init__(self, num_classes):
super(Net, self).__init__()
self.base = nn.Sequential(
nn.Conv2d(3, 6, 5),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(6, 16, 5),
nn.ReLU(),
Flatten(),
nn.ReLU(),
nn.Linear(1256000, 157),
nn.Linear(157, 84),
nn.Linear(84, 5)
)
self.out_bbox = nn.Linear(5, 4)
def forward(self, x):
x = self.base(x)
x = x.view(x.size(0), -1)
x_bbox = self.out_bbox(x)
return x_bbox
net = Net(num_classes=5)
x = torch.randn(1, 3, 512, 640)
output = net(x)
criterion_bbox = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)