Hey everyone, I’m trying to build a region proposal network with small a convolutional head and vgg16 as a backbone for feature extraction. I’m having an issue where the parameters are not being updated (currently fine tuning but will freeze the extractor later), and when I check gradients all of them are None. I keep getting dummy predictions and the loss isn’t decreasing. I’ve pretty much scoured this site, the docs, and google for answers, but I’m not sure. And debugs I might have missed? Thanks
class RegionProposalNetwork(nn.Module):
def init(self, in_channels=512, mid_channels=512, ratios=[1, 2, 4], anchor_scales=[8, 16, 32], stride=16, ):
super(RegionProposalNetwork, self).__init__()
n_anchor = len(anchor_scales)*len(ratios)
#print(n_anchor)
self.ratios = ratios
self.anchor_scales = anchor_scales
self.extractor = self.fe_init()
self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
self.reg_layer = nn.Conv2d(mid_channels, n_anchor *4, 1, 1, 0)
self.cls_layer = nn.Conv2d(mid_channels, n_anchor *2, 1, 1, 0)
# paper initialzes these layers with zero mean dn 0.01 standard deviation
# conv sliding layer
self.conv1.weight.data.normal_(0, 0.01)
self.conv1.bias.data.zero_()
# Regression layer
self.reg_layer.weight.data.normal_(0, 0.01)
self.reg_layer.bias.data.zero_()
# classification layer
self.cls_layer.weight.data.normal_(0, 0.01)
self.cls_layer.bias.data.zero_()
# Network Parameters
#print(list(self.extractor.parameters()))
#print(list(self.conv1.parameters()))
#print(list(self.reg_layer.parameters()))
#print(list(self.cls_layer.parameters()))
#self.params = list(self.conv1.parameters()) + list(self.reg_layer.parameters()) + list(self.cls_layer.parameters()) #+ list(self.extractor.parameters())
def forward(self, image, bbox, img_size, scale=16):
# load data onto cpu or cuda
#tensor = image.to(device)
#bbox = bbox.to(device)
# Anchors Generation
anchors = anchor_target_generator(ratios=self.ratios, scales=self.anchor_scales)
ious = bbox_ious(bbox.cpu(), anchors)
anchor_labels = assign_labels(ious, anchors, img_size)
anchor_locations = bbox_to_relative(ious, anchors, bbox.cpu(), img_size)
print('Anchor Generation Completed')
# Turn Anchors and Labels into Tensor
gt_rpn_loc = torch.from_numpy(anchor_locations)
gt_rpn_score = torch.from_numpy(anchor_labels)
print("Extracting... ", end='')
x = self.extractor(image)
x = self.conv1(x)
# prediction of object location with respect to the anchor and objectness scores
pred_anchor_locs = self.reg_layer(x)
pred_cls_scores = self.cls_layer(x)
#print(pred_cls_scores, pred_anchor_locs)
# Reformat anchor targets to match our anchor target sizes
pred_anchor_locs = pred_anchor_locs.permute(0, 2, 3, 1).contiguous().view(1, -1, 4)
pred_cls_scores = pred_cls_scores.permute(0, 2, 3, 1).contiguous()
pred_cls_scores = pred_cls_scores.view(1, -1, 2)
rpn_loc = pred_anchor_locs[0]
rpn_score = pred_cls_scores[0]
#print(rpn_loc.shape)
#print(rpn_score.shape)
#print("loss calc")
rpn_score = rpn_score.detach().requires_grad_(True)
rpn_score = rpn_score.to(device)
rpn_loc = rpn_loc.detach().requires_grad_(True)
rpn_loc = rpn_loc.to(device)
gt_rpn_score = gt_rpn_score.detach()#.requires_grad_(True)
gt_rpn_score = gt_rpn_score.to(device)
gt_rpn_loc = gt_rpn_loc.detach().requires_grad_(True)
gt_rpn_loc = gt_rpn_loc.to(device)
#rpn_loss = self.loss(rpn_loc, rpn_score, gt_rpn_loc, gt_rpn_score)
return rpn_loc, rpn_score, gt_rpn_loc, gt_rpn_score
def predict(self, image):
nms_thresh = 0.7
n_train_pre_nms = 12000
n_train_post_nms = 2000
n_test_pre_nms = 6000
n_test_post_nms = 300
min_size = 16
anchors = anchor_target_generator(ratios=self.ratios, scales=self.anchor_scales)
#tensor = tr.to_tensor(image)
#tensor = tensor.reshape(1, 3, 800, 800)
tensor = image.to(device)
x = self.extractor(tensor)
x = self.conv1(x)
# print("pred: extacted")
# prediction of object location with respect to the anchor and objectness scores
pred_anchor_locs = self.reg_layer(x)
pred_cls_scores = self.cls_layer(x)
#print("pred: prediction")
#locs = [x for _,x in sorted(zip(pred_cls_scores, pred_anchor_locs))]
#scores = sorted(pred_cls_scores)
# Reformat anchor targets to match our anchor target sizes
pred_cls_scores = pred_cls_scores.permute(0, 2, 3, 1).contiguous()
#print(pred_anchor_locs.shape)
# pred_cls_scores = pred_cls_scores.view(1, -1, 2) <-- for softmax classification
objectness_score = pred_cls_scores.view(1, 50, 50, len(self.ratios)*len(self.anchor_scales), 2)[:, :, :, :, 1].contiguous().view(1, -1)
# convert predictions using the same formulas above
objectness_score_numpy = objectness_score[0].cpu().data.numpy()
roi = relative_to_bbox(anchors, pred_anchor_locs.cpu())
roi, score = proposal_layer(roi, objectness_score_numpy)
return roi, score
def fe_init(self):
vgg16 = torchvision.models.vgg16(pretrained=True)
layers = list(vgg16.features)
new_layers = []
for i in range(29):
layer = layers[i]
if isinstance(layer, nn.ReLU):
layer.inplace = True
#print(layer)
new_layers.append(layer)
#print(new_layers[0])
extractor = nn.Sequential(*new_layers)
for param in extractor.parameters():
param.requires_grad = True
return extractor
def rpn_loss(rpn_loc, rpn_score, gt_rpn_loc, gt_rpn_score):
rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_score.long(), ignore_index = -1)
print(rpn_cls_loss)
pos = gt_rpn_score > 0
mask = pos.unsqueeze(1).expand_as(rpn_loc)
# extract bounding boxes from positive labels
mask_loc_preds = rpn_loc[mask].view(-1, 4)
mask_loc_targets = gt_rpn_loc[mask].view(-1, 4)
x = torch.abs(mask_loc_targets - mask_loc_preds)
rpn_loc_loss = ((x < 1).float() * 0.5 * x**2) + ((x >= 1).float() * (x-0.5))
# apply loss
x = torch.abs(mask_loc_targets - mask_loc_preds)
rpn_loc_loss = ((x < 1).float() * 0.5 * x**2) + ((x >= 1).float() * (x-0.5))
# Combine and apply our class loss, using a regularization parameter
rpn_lambda = 10.
N_reg = (gt_rpn_score >0).float().sum()
rpn_loc_loss = rpn_loc_loss.sum() / N_reg
rpn_loss = rpn_cls_loss + (rpn_lambda * rpn_loc_loss)
return rpn_loss
epochs = 5
device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
Assuming that we are on a CUDA machine, this should print a CUDA device:
rpn = RegionProposalNetwork()
rpn = rpn.to(device)
params = []
for name, param in rpn.named_parameters():
if param.requires_grad == True:
params.append(param)
print("\t", name)
lr = .0001
#params = list(rpn.parameters())
#print(list(rpn.parameters()))
optimizer = optim.Adam(rpn.parameters(), lr=lr)# momentum=0.9)
#torch.autograd.set_detect_anomaly(True)
print(list(rpn.parameters())[0].grad)
for i in idSALAMI[:epochs]:
print(“epoch:”, i, “loss:”, loss)
a = list(rpn.parameters())[0].clone()
i = 355
optimizer.zero_grad()
sample = salamiScaled[i]
rpn_loc, rpn_score, gt_rpn_loc, gt_rpn_score = rpn(image=sample[‘image’].to(device), bbox=sample[‘bboxes’].to(device), img_size=[800,800])
loss = rpn_loss(rpn_loc, rpn_score, gt_rpn_loc, gt_rpn_score)
loss.backward()
optimizer.step()
b = list(rpn.parameters())[0].clone()
print(torch.equal(a.data, b.data))
My training scheme and module are in the thread. Apologies if they’re unorganized, I’m new to this!
You are detaching some activations in the forward
method in:
rpn_score = rpn_score.detach().requires_grad_(True)
rpn_score = rpn_score.to(device)
rpn_loc = rpn_loc.detach().requires_grad_(True)
which will (as the name suggests ) detach these tensors from the computation graph and no gradients will be calculated for all previously used layers.
Yep, this was it. Thanks!
Thanks for the information. It is really helpful for us. Please keep sharing this type of post.