Why do “score.backward()” return a gradient tensor containing all zeros?

When I tried to modify a CNN visualization method named gradient CAM to work for my YOLO v3 model, I met a problem that the gradient calculated contains all zeros which cannot be used for the following process after calling score.backward(). The score is the bounding box’s confidence regarding to that class and this value has been processed by NMS.

Any hint or help would be appreciated!

class GradCAM(object):
    """
    1: the network does not update gradient, input requires the update
    2: use targeted class's score to do backward propagation
    """

    def __init__(self, net, layer_name):
        self.net = net
        self.layer_name = layer_name
        self.feature = None
        self.gradient = None
        self.net.eval()
        self.handlers = []
        self._register_hook()

    def _get_features_hook(self, module, input, output):
        self.feature = output
        print("feature shape:{}".format(output.size()))

    def _get_grads_hook(self, module, input_grad, output_grad):
        """
        :param input_grad: tuple, input_grad[0]: None
                                   input_grad[1]: weight
                                   input_grad[2]: bias
        :param output_grad:tuple,长度为1
        :return:
        """
        #print(input_grad)
        #print(output_grad)
        self.gradient = output_grad[0]

    def _register_hook(self):
        for i, module in enumerate(self.net.module_list):
            if module == self.layer_name:
                self.handlers.append(module.register_forward_hook(self._get_features_hook))
                self.handlers.append(module.register_backward_hook(self._get_grads_hook))

    def remove_handlers(self):
        for handle in self.handlers:
            handle.remove()

    def __call__(self, inputs, index=0):
        """
        :param inputs: {"image": [C,H,W], "height": height, "width": width}
        :param index: 第几个边框
        :return:
        """
        self.net.zero_grad()
        output = self.net(inputs['image'])[0]
        output_nonmax = utils.non_max_suppression(output, conf_thres=0.25, iou_thres=0.45, multi_label=True)[0]
        print(output_nonmax)
        score = output_nonmax[index][4]
        print(score)
        score.retain_grad()
        #score = output[0]['instances'].scores[index]
        #proposal_idx = output[0]['instances'].indices[index]  
        proposal_idx = index
        score.backward()
        print(score.is_leaf)
        
        print(score.grad)
        gradient = self.gradient[proposal_idx].cpu().data.numpy()  # [C,H,W]
        #print(gradient)
        weight = np.mean(gradient, axis=(1, 2))  # [C]
        #print(weight)
        feature = self.feature[proposal_idx].cpu().data.numpy()  # [C,H,W]

        cam = feature * weight[:, np.newaxis, np.newaxis]  # [C,H,W]
        cam = np.sum(cam, axis=0)  # [H,W]
        cam = np.maximum(cam, 0)  # ReLU
        # normalize
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        
        # resize to 224*224
        box = output[index][:4].detach().numpy().astype(np.int32)
        x1, y1, x2, y2 = box
        cam = cv2.resize(cam, (x2 - x1, y2 - y1))

        class_id = output[index][-1].detach().numpy()
        return cam, box, class_id