Custom loss function based on model parameters

Hi, I am trying to create a custom loss function to induce gradients based on change or no change in certain filter parameters within certain layers of the model.

I am creating a custom loss function which has cross entropy loss and the L2 norm of change in filter values added or subtracted based on the filter index within a layer.

This is an implementation of my code, how can I model the custom loss to obtain gradients based on model parameter’s L2 norm to an identical model ?

import torch
import torch.nn as nn

def get_nested_attr(obj, attr):
    attributes = attr.split('.')
    for attribute in attributes:
        obj = getattr(obj, attribute)
    return obj

def filter_change_loss(model, model_zero, loss, layer_filter_index):
    for each_layer, filter_indexes in layer_filter_index.items():
        for i in range(len(get_nested_attr(model, each_layer).weight)):
            l2_norm = torch.norm(get_nested_attr(model, each_layer).weight[i].data, get_nested_attr(model_zero, each_layer).weight[i].data)
            if i in filter_indexes:
                loss += l2_norm
            else:
                loss -= l2_norm
    return loss

model_zero = #Some model
model = #Current model
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
layer_filter_index = {
    "layer1.1.conv1" = [1, 4, 18, 24, 56, 57, 52, 54, 58, 61, 63],
    "layer1.1.conv2" = [2, 5, 7, 8, 27, 29, 34, 38, 47, 49, 61]
}

for epoch in range(total_epochs):
    for i, (x, y) in enumerate(trainloader):
        optimizer.zero_grad()
        x = x.to(device)
        y = y.to(device)
        outputs = model(x)
        loss = criterion(outputs, y)
        loss = filter_change_loss(model, model_zero, loss, layer_filter_index)
        loss.backward()
        optimizer.step()

I don’t understand this question, but based on your code you would have to remove the .data attribute as it’s usage will skip Autograd and the L2 weight loss will thus be a constant not influencing the gradients.

model = nn.Linear(1, 1)
loss = model.weight.norm(2)
loss.backward()
print(model.weight.grad)
# tensor([[-1.]])

model = nn.Linear(1, 1)
loss = model.weight.data.norm(2)
loss.backward()
# RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn```

Understood, I will make the changes and try it out. Thank you