Gradient always zero in weighted poolinng


I have implemented a custom Adaptive pooling layer which is meant to learn the best weighting between MaxPooling and AvgPooling but for some reason the gradient of the weight is always zero. What am I missing?

class HybridPooling2d(nn.Module):
    def __init__(self, output_size):
        super(HybridPooling2d, self).__init__()
        self.output_size = output_size
        self.weight = nn.Parameter( torch.Tensor(1, 1) )

    def forward(self, x):
        return self.weight * F.adaptive_avg_pool2d(x, self.output_size) + (1 - self.weight) * F.adaptive_max_pool2d(x, self.output_size)

I am using this in the last layer of my network as follows:

 def forward(self, x):
        out = self.features(x)
#         out = F.max_pool2d(out, kernel_size=out.size()[2:])
        out = self.ghp(out)
        out = F.dropout2d(out, 0.1, training=True)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

where self.features = nn.Sequential(...) and self.ghp = HybridPooling2d(..)