How to constrain weights in training?

I have tried like this:

class Net(nn.Module):
    def __init__(self, mask_1_2, mask_2_3, mask_3_4, mask_4_out):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1, 2)
        self.fc2 = nn.Linear(2, 3)
        self.fc3 = nn.Linear(3, 2)
        self.fc4 = nn.Linear(2, 1)
        
        self.mask_1_2 = mask_1_2
        self.mask_2_3 = mask_2_3
        self.mask_3_4 = mask_3_4
        self.mask_4_out = mask_4_out
    def forward(self, x, scale="no"):  
        if scale == "yes":
            self.fc1.weight.data = self.fc1.weight.data.mul_(self.mask_1_2)
            self.fc1.weight.data = self.fc1.weight.data.clamp(-1, 1)
            self.fc1.bias.data = self.fc1.bias.data.clamp(-1, 1)
            x = torch.sigmoid(self.fc1(x))
            
            self.fc2.weight.data = self.fc2.weight.data.mul_(self.mask_2_3)
            self.fc2.weight.data = self.fc2.weight.data.clamp(-1, 1)
            self.fc2.bias.data = self.fc2.bias.data.clamp(-1, 1)
            x = torch.sigmoid(self.fc2(x))
            
            self.fc3.weight.data = self.fc3.weight.data.mul_(self.mask_3_4)
            self.fc3.weight.data = self.fc3.weight.data.clamp(-1, 1)
            self.fc3.bias.data = self.fc3.bias.data.clamp(-1, 1)
            x = torch.sigmoid(self.fc3(x))
            
            self.fc4.weight.data = self.fc4.weight.data.mul_(self.mask_4_out)
            self.fc4.weight.data = self.fc4.weight.data.clamp(-1, 1)
            self.fc4.bias.data = self.fc4.bias.data.clamp(-1, 1)
            x = torch.sigmoid(self.fc4(x))
        
        elif scale == "no":
            self.fc1.weight.data = self.fc1.weight.data.mul_(self.mask_1_2)
            x = torch.sigmoid(self.fc1(x))
            
            self.fc2.weight.data = self.fc2.weight.data.mul_(self.mask_2_3)
            x = torch.sigmoid(self.fc2(x))
            
            self.fc3.weight.data = self.fc3.weight.data.mul_(self.mask_3_4)
            x = torch.sigmoid(self.fc3(x))
            
            self.fc4.weight.data = self.fc4.weight.data.mul_(self.mask_4_out)
            x = torch.sigmoid(self.fc4(x))
        return x

It’s ok to train when “scale” == no, the result for fitting a SIN func is good.
But if I use “scale” == yes, the regression result will be very bad.
I think the weights are modified, so they are away from the correct Backpropagation. Is there any way to train well with scale?