My model does not use any BatchNorm layers. However, I implemented a normalized convolution like below, and my network only utilizes this operation

```
# The proposed Normalized Convolution Layer
class NConv2d(_ConvNd):
def __init__(self, in_channels, out_channels, kernel_size, pos_fn='softplus',
init_method='k', stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'):
kernel_size = _pair(kernel_size)
stride = _pair(stride)
padding = _pair(padding)
dilation = _pair(dilation)
# Call _ConvNd constructor
super(NConv2d, self).__init__(in_channels, out_channels, kernel_size,
stride, padding, dilation, False, _pair(0), groups, bias, padding_mode)
self.eps = 1e-20
self.pos_fn = pos_fn
self.init_method = init_method
# Initialize weights and bias
self.init_parameters()
if self.pos_fn is not None:
EnforcePos.apply(self, 'weight', pos_fn)
def forward(self, data, conf):
# Normalized Convolution
denom = F.conv2d(conf, self.weight, None, self.stride,
self.padding, self.dilation, self.groups)
nomin = F.conv2d(data*conf, self.weight, None, self.stride,
self.padding, self.dilation, self.groups)
nconv = nomin / (denom+self.eps)
# Add bias
b = self.bias
sz = b.size(0)
b = b.view(1,sz,1,1)
b = b.expand_as(nconv)
nconv += b
# Propagate confidence
cout = denom
sz = cout.size()
cout = cout.view(sz[0], sz[1], -1)
k = self.weight
k_sz = k.size()
k = k.view(k_sz[0], -1)
s = torch.sum(k, dim=-1, keepdim=True)
cout = cout / s
cout = cout.view(sz)
return nconv, cout
# Non-negativity enforcement class
class EnforcePos(object):
def __init__(self, pos_fn, name):
self.name = name
self.pos_fn = pos_fn
@staticmethod
def apply(module, name, pos_fn):
fn = EnforcePos(pos_fn, name)
module.register_forward_pre_hook(fn)
return fn
def __call__(self, module, inputs):
if module.training:
weight = getattr(module, self.name)
weight.data = self._pos(weight).data
else:
pass
def _pos(self, p):
pos_fn = self.pos_fn.lower()
if pos_fn == 'softmax':
p_sz = p.size()
p = p.view(p_sz[0],p_sz[1], -1)
p = F.softmax(p, -1)
return p.view(p_sz)
elif pos_fn == 'exp':
return torch.exp(p)
elif pos_fn == 'softplus':
return F.softplus(p, beta=10)
elif pos_fn == 'sigmoid':
return F.sigmoid(p)
else:
print('Undefined positive function!')
return
```

You can see that this implementation used class `EnforcePos`

to force the weights non-negative. I wonder that this implementation might cause the problem as I mentioned in this post, but I’m not sure. Do you have any suggestions for implementing non-negative constraint?

I have some problems when building pytorch 1.5.0 from source because my CUDA version is 9.0. Anyway, I’ll try to update to lastest version and re-test again!