You could use an adaptive pooling layer first and then calculate the average using a view
on the result:
x = torch.randn(16, 14, 14)
out = F.adaptive_max_pool2d(x.unsqueeze(0), output_size=1)
# Calculate result manually to compare results
out_manual = torch.stack([out[:, i:i+4].mean() for i in range(0, 16, 4)])
out = out.view(out.size(0), out.size(1)//4, -1)
out = out.mean(2)
print(torch.allclose(out_manual, out))
> True