Enforcining monotonicity in additive neural networks

I am trying to build an additive neural network and force a monotonic constraint on a particular feature in relation to the output.

This is my model architecture :

class GAM_Torch(nn.Module):
    def __init__(self, input_size, output_size, hidden_neurons, hidden_layers=5, device='cpu'):
        super(GAM_Torch, self).__init__()
        self.device = device
        self.output_size = output_size
        self.linears = nn.ModuleList([nn.Linear(1, hidden_neurons) for i in range(input_size)])
        self.hidden_layers = nn.ModuleList([nn.ModuleList([nn.Linear(hidden_neurons, hidden_neurons) for i in range(input_size)]) for _ in range(hidden_layers)])
        self.outputs = nn.ModuleList([nn.Linear(hidden_neurons, 1) for i in range(input_size)])
        self.elu = nn.ELU()
        self.bns = nn.ModuleList([nn.ModuleList([nn.BatchNorm1d(hidden_neurons) for i in range(input_size)]) for _ in range(hidden_layers)])
        self.dropout = nn.Dropout()
    def forward(self, x):
        out = torch.zeros(x.shape[0], self.output_size).to(self.device)

        for i in range(x.shape[1]):
           x_i = self.linears[i](x[:, i].unsqueeze(1))
           x_i = self.elu(x_i)

           if i == 0:  # Apply monotonic activation only to the first feature
            x_i = self.monotonic_activation(x_i)

           for layer, bn in zip(self.hidden_layers, self.bns):
            x_i = layer[i](x_i)
            x_i = bn[i](x_i)
            x_i = self.elu(x_i) 

           x_i = self.outputs[i](x_i)
           out += x_i

        return out  

    def monotonic_activation(self, x):
    # Apply monotonic activation function to enforce monotonicity
      #return torch.cumsum(torch.abs(x),dim=0)
      return x  

And this is the code for training of my model, where i calculate the loss by comparing the enforced monotonic output(target) with the actual target.

learning_rate = 0.01
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Training loop
num_epochs = 100
batch_size = 40
for epoch in range(num_epochs):
    for i in range(0, len(x_train_normalized), batch_size):
        batch_x = x_train_normalized[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]


        # Forward pass
        outputs = model(batch_x)
        enforced_mono_output = enforce_monotonicity(batch_x[0],outputs)
        loss = criterion(enforced_mono_output, batch_y)


        # Backward pass and optimization
        # Store the training loss value


This function here enforces increasing monotonicity. It makes sure that the output increases or at least remains the same in relation to increase in feature value.

def enforce_monotonicity(feature_values, output_values):
    sorted_indices = sorted(range(len(feature_values)), key=lambda k: feature_values[k])
    sorted_output_values = [output_values[i] for i in sorted_indices]
    for i in range(1, len(sorted_output_values)):
        if sorted_output_values[i] < sorted_output_values[i-1]:
            sorted_output_values[i] = sorted_output_values[i-1]
    for i, j in enumerate(sorted_indices):
        output_values[j] = sorted_output_values[i]
    return output_values

features = [2, 4, 1, 3]
outputs = [10, 15, 5, 6]
enforced_output_values = enforce_monotonicity(features, outputs)

[10, 15, 5, 10]

I am wondering if this approach actually makes sense to calculate the loss where the model’s output has been put into an enforced monotonic layer and compare it with the target.