I have implemented a custom Adaptive pooling layer which is meant to learn the best weighting between MaxPooling and AvgPooling but for some reason the gradient of the weight is always zero. What am I missing?
class HybridPooling2d(nn.Module): def __init__(self, output_size): super(HybridPooling2d, self).__init__() self.output_size = output_size self.weight = nn.Parameter( torch.Tensor(1, 1) ) self.weight.data.uniform_() def forward(self, x): return self.weight * F.adaptive_avg_pool2d(x, self.output_size) + (1 - self.weight) * F.adaptive_max_pool2d(x, self.output_size)
I am using this in the last layer of my network as follows:
def forward(self, x): out = self.features(x) # out = F.max_pool2d(out, kernel_size=out.size()[2:]) out = self.ghp(out) out = F.dropout2d(out, 0.1, training=True) out = out.view(out.size(0), -1) out = self.classifier(out) return out
self.features = nn.Sequential(...) and
self.ghp = HybridPooling2d(..)