How to add L2 regularizer?

huynth · January 10, 2022, 2:30pm

This is the code in Tensorflow

class DownShiftedConv2d(tf.keras.layers.Layer):
    def __init__(self, num_filters, filter_size=(2, 3), dilation_rate=(1, 1), kernel_regularizer='L2', padding='VALID',
                 **kwargs):
        super().__init__(**kwargs)
        if isinstance(filter_size, int):
            self.filter_size = (filter_size, filter_size)
        else:
            self.filter_size = filter_size
        self.wnconv = tf.keras.layers.Conv2D(num_filters, filter_size, strides=dilation_rate,
                                             kernel_regularizer=kernel_regularizer, padding=padding)

    def call(self, inputs, *args, **kwargs):
        output = tf.pad(inputs, [[0, 0], [self.filter_size[0] - 1, 0],
                                 [int((self.filter_size[1] - 1) / 2), int((self.filter_size[1] - 1) / 2)],
                                 [0, 0]])
        output = self.wnconv(output)
        return output

I want to translate this code from Tensorflow to Pytorch but don’t know the correct way to add L2 regularizer.
This is my code :

class DownShiftedConv2D(nn.Module):
    def __init__(self, in_channels, num_filters, filter_size=(2, 3), dilation_rate=(1, 1), padding='valid',
                 **kwargs):
        super(DownShiftedConv2D, self).__init__(**kwargs)
        self.in_channels = in_channels
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.dilation_rate = dilation_rate
        
        if isinstance(filter_size, int):
            self.filter_size = (filter_size, filter_size)
        else:
            self.filter_size = filter_size
        self.wnconv = nn.Conv2d(in_channels=in_channels, out_channels=num_filters, kernel_size=filter_size,
                                stride=dilation_rate, padding=padding)

    def forward(self, inputs, *args, **kwargs):
        output = F.pad(inputs, (0,0, 
                                self.filter_size[0] - 1, 0,
                                int((self.filter_size[1] - 1) / 2), int((self.filter_size[1] - 1) / 2),
                                0, 0))

Can anyone help me ? Thank you

suraj.pt · January 10, 2022, 4:41pm

You could add the weight’s L2 norm to the loss before optimizing

l2_lambda = 0.01
l2_reg = torch.tensor(0.)
for name, param in model.named_parameters():
    if 'conv' in name:
        l2_reg += torch.linalg.norm(param)
loss += l2_lambda * l2_reg

AFAIK the weight_decay parameter in your optimizer will apply to all parameters in the network, and not just the conv layers.

huynth · January 11, 2022, 12:32am

Thanks for your answer