Hi , I was trying to generate a group of trainable monotone positive tensors as the weights for Conv1d. It actually works to some extent, but the problem is the real trainable parameter “alpha”/“beta” changed very slightly compared with orthodox convolution weights. They just fluctuated around the initiate value. So I suspect the grads are not very friendly to train due to the way I create those weights. Just curious if there is better way to handle it. Thanks!
class Fir1d(nn.Module):
def __init__(self, k_size, device, init_alpha, init_beta=0):
super().__init__()
# every channel uses same kernel weights
assert k_size % 2 == 1
m = (k_size-1)//2
self.leftpad = nn.ReplicationPad1d((k_size-1, 0))
self.lin = torch.linspace(-m, m, k_size, device=device, requires_grad=False)
self.alpha = nn.Parameter(torch.tensor([init_alpha], device=device, dtype=torch.float32, requires_grad=True))
self.beta = nn.Parameter(torch.tensor([init_beta], device=device, dtype=torch.float32, requires_grad=True))
self.w = torch.softmax(self.lin * self.alpha + self.beta, dim=0).unsqueeze(0).unsqueeze(0) # 1,1,k
def forward(self, x):
# b,c,l
xlist = []
x = self.leftpad(x)
B,C,L = x.shape
for i in range(C):
subx = x[:, [i], :]
main_trend = F.conv1d(subx, self.w)
xlist.append(main_trend)
xlist = torch.cat(xlist, dim=1)
return xlist