I am trying to build an additive neural network and force a monotonic constraint on a particular feature in relation to the output.
This is my model architecture :
class GAM_Torch(nn.Module):
def __init__(self, input_size, output_size, hidden_neurons, hidden_layers=5, device='cpu'):
super(GAM_Torch, self).__init__()
self.device = device
self.output_size = output_size
self.linears = nn.ModuleList([nn.Linear(1, hidden_neurons) for i in range(input_size)])
self.hidden_layers = nn.ModuleList([nn.ModuleList([nn.Linear(hidden_neurons, hidden_neurons) for i in range(input_size)]) for _ in range(hidden_layers)])
self.outputs = nn.ModuleList([nn.Linear(hidden_neurons, 1) for i in range(input_size)])
self.elu = nn.ELU()
self.bns = nn.ModuleList([nn.ModuleList([nn.BatchNorm1d(hidden_neurons) for i in range(input_size)]) for _ in range(hidden_layers)])
self.dropout = nn.Dropout()
def forward(self, x):
out = torch.zeros(x.shape[0], self.output_size).to(self.device)
for i in range(x.shape[1]):
x_i = self.linears[i](x[:, i].unsqueeze(1))
x_i = self.elu(x_i)
if i == 0: # Apply monotonic activation only to the first feature
x_i = self.monotonic_activation(x_i)
for layer, bn in zip(self.hidden_layers, self.bns):
x_i = layer[i](x_i)
x_i = bn[i](x_i)
x_i = self.elu(x_i)
x_i = self.outputs[i](x_i)
out += x_i
return out
def monotonic_activation(self, x):
# Apply monotonic activation function to enforce monotonicity
#return torch.cumsum(torch.abs(x),dim=0)
return x
And this is the code for training of my model, where i calculate the loss by comparing the enforced monotonic output(target) with the actual target.
learning_rate = 0.01
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Training loop
num_epochs = 100
batch_size = 40
for epoch in range(num_epochs):
model.train()
for i in range(0, len(x_train_normalized), batch_size):
batch_x = x_train_normalized[i:i+batch_size]
batch_y = y_train[i:i+batch_size]
# Forward pass
outputs = model(batch_x)
enforced_mono_output = enforce_monotonicity(batch_x[0],outputs)
loss = criterion(enforced_mono_output, batch_y)
# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Store the training loss value
train_loss_values.append(loss.item())
This function here enforces increasing monotonicity. It makes sure that the output increases or at least remains the same in relation to increase in feature value.
def enforce_monotonicity(feature_values, output_values):
sorted_indices = sorted(range(len(feature_values)), key=lambda k: feature_values[k])
sorted_output_values = [output_values[i] for i in sorted_indices]
for i in range(1, len(sorted_output_values)):
if sorted_output_values[i] < sorted_output_values[i-1]:
sorted_output_values[i] = sorted_output_values[i-1]
for i, j in enumerate(sorted_indices):
output_values[j] = sorted_output_values[i]
return output_values
features = [2, 4, 1, 3]
outputs = [10, 15, 5, 6]
enforced_output_values = enforce_monotonicity(features, outputs)
print(enforced_output_values)
[10, 15, 5, 10]
I am wondering if this approach actually makes sense to calculate the loss where the model’s output has been put into an enforced monotonic layer and compare it with the target.