Will this sample code above work for multiple bounding-boxes per image?
I use exactly the same concept for pre-trained ResNet-50 architecture which looks like;
class ResNet50(nn.Module):
def __init__(self, num_classes=3):
super(ResNet50, self).__init__()
resnet = models.resnet50(pretrained=True)
layers = list(resnet.children())[:8]
self.features1 = nn.Sequential(*layers[:6])
self.features2 = nn.Sequential(*layers[6:])
self.classifier = nn.Sequential(nn.BatchNorm1d(2048),
nn.Linear(2048, num_classes))
self.bb = nn.Sequential(nn.BatchNorm1d(2048),
nn.Linear(2048, 4))
def forward(self, x):
x = self.features1(x)
x = self.features2(x)
x = F.relu(x)
x = nn.AdaptiveAvgPool2d((1,1))(x)
x = x.view(x.shape[0], -1)
return self.classifier(x), self.bb(x)
model = ResNet50().cuda()
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.006)
criterion = nn.CrossEntropyLoss()
And bounding boxes in this format:
inputs, targets = next(iter(trainloader)) # batch-size=2
print(targets)
tensor([[0.0000, 1.0000, 0.6492, 0.6117, 0.0203, 0.0219],
[0.0000, 1.0000, 0.7113, 0.4547, 0.0102, 0.0109],
[1.0000, 0.0000, 0.6271, 0.6268, 0.0073, 0.0068],
[1.0000, 0.0000, 0.6039, 0.6328, 0.0078, 0.0083],
[1.0000, 0.0000, 0.4901, 0.6349, 0.0063, 0.0073],
[1.0000, 1.0000, 0.6044, 0.6117, 0.0057, 0.0057],
[1.0000, 0.0000, 0.6096, 0.6057, 0.0057, 0.0062]])
Column-0 - bounding box image index
Column-1 - Class label {0,1,2}
Column-2:5 - Bounding boxes
for i in range(epochs):
model.train()
total,total_loss = 0.0, 0.0
for xs, ys in train_dl:
xs = xs.cuda().float()
ys_idx = ys[:,0].cuda()
ys_class = ys[:,1].cuda()
ys_bb = ys[:,2:].cuda().float()
print(f"Target Classes: {ys_class}\nTarget Classes Shape: {ys_class.shape}", )
pred_class, pred_bb = model(xs)
pred = torch.max(pred_class,1)[1]
print(f"Predicted Classes: {pred}\nPredicted Classes Shape: {pred.shape}")
loss_class = criterion(pred.float(), ys_class.long())
The at the loss function, I get an error;
Target Classes: tensor([1., 1., 0., 0., 0., 1., 0.], device='cuda:0')
Target Classes: torch.Size([7])
Passes classifier
Predicted Classes: tensor([3, 1], device='cuda:0')
Predicted Classes: torch.Size([2])
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)