Enforcining monotonicity in additive neural networks

I am trying to build an additive neural network and force a monotonic constraint on a particular feature in relation to the output.

This is my model architecture :

class GAM_Torch(nn.Module):
    def __init__(self, input_size, output_size, hidden_neurons, hidden_layers=5, device='cpu'):
        super(GAM_Torch, self).__init__()
        self.device = device
        self.output_size = output_size
        self.linears = nn.ModuleList([nn.Linear(1, hidden_neurons) for i in range(input_size)])
        self.hidden_layers = nn.ModuleList([nn.ModuleList([nn.Linear(hidden_neurons, hidden_neurons) for i in range(input_size)]) for _ in range(hidden_layers)])
        self.outputs = nn.ModuleList([nn.Linear(hidden_neurons, 1) for i in range(input_size)])
        self.elu = nn.ELU()
        self.bns = nn.ModuleList([nn.ModuleList([nn.BatchNorm1d(hidden_neurons) for i in range(input_size)]) for _ in range(hidden_layers)])
        self.dropout = nn.Dropout()
    
    def forward(self, x):
        out = torch.zeros(x.shape[0], self.output_size).to(self.device)

        for i in range(x.shape[1]):
           
           x_i = self.linears[i](x[:, i].unsqueeze(1))
           x_i = self.elu(x_i)

           if i == 0:  # Apply monotonic activation only to the first feature
            x_i = self.monotonic_activation(x_i)

           for layer, bn in zip(self.hidden_layers, self.bns):
            x_i = layer[i](x_i)
            x_i = bn[i](x_i)
            x_i = self.elu(x_i) 

           x_i = self.outputs[i](x_i)
           out += x_i

        return out  


    def monotonic_activation(self, x):
    # Apply monotonic activation function to enforce monotonicity
      #return torch.cumsum(torch.abs(x),dim=0)
      return x  

And this is the code for training of my model, where i calculate the loss by comparing the enforced monotonic output(target) with the actual target.


learning_rate = 0.01
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Training loop
num_epochs = 100
batch_size = 40
for epoch in range(num_epochs):
    model.train()
    for i in range(0, len(x_train_normalized), batch_size):
        batch_x = x_train_normalized[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]

        

        # Forward pass
        outputs = model(batch_x)
        
        enforced_mono_output = enforce_monotonicity(batch_x[0],outputs)
        
        
        loss = criterion(enforced_mono_output, batch_y)

      

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Store the training loss value
        train_loss_values.append(loss.item())


        

This function here enforces increasing monotonicity. It makes sure that the output increases or at least remains the same in relation to increase in feature value.

def enforce_monotonicity(feature_values, output_values):
    sorted_indices = sorted(range(len(feature_values)), key=lambda k: feature_values[k])
    sorted_output_values = [output_values[i] for i in sorted_indices]
    for i in range(1, len(sorted_output_values)):
        if sorted_output_values[i] < sorted_output_values[i-1]:
            sorted_output_values[i] = sorted_output_values[i-1]
    for i, j in enumerate(sorted_indices):
        output_values[j] = sorted_output_values[i]
    return output_values

features = [2, 4, 1, 3]
outputs = [10, 15, 5, 6]
enforced_output_values = enforce_monotonicity(features, outputs)
print(enforced_output_values)

[10, 15, 5, 10]

I am wondering if this approach actually makes sense to calculate the loss where the model’s output has been put into an enforced monotonic layer and compare it with the target.