Difference between padding then convolution or convolution with padding

I wrote a short piece of code in order to determinate whether the padding within Conv2D is done before or after the convolution:

import torch.nn as nn
import numpy as np
import torch

weights = np.array([[1, 2, 6],
                    [4, 7, 9],
                    [1, 4, 7]])

class Model1(nn.Sequential):
    def __init__(self):
        super(Model1, self).__init__()
        self.add_module('conv', nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False))
        
        self.conv.weight.data = torch.from_numpy(weights).view(1, 1, 3,3 )
        print(self.conv.weight.data)
        
        
    def forward(self, x):
        return super().forward(x)
    
    
class Model2(nn.Sequential):
    def __init__(self):
        super(Model2, self).__init__()
        self.add_module('padding', nn.ZeroPad2d(1))
        self.add_module('conv', nn.Conv2d(1, 1, kernel_size=3, padding=0, bias=False))
        self.conv.weight.data = torch.from_numpy(weights).view(1, 1, 3,3 )

        
    def forward(self, x):
        return super().forward(x)
    
    
if __name__ == '__main__':
    mod1 = Model1()
    mod2 = Model2()
    
    input_ = np.random.randint(low=1, high=99, size=(81, 81))
    input_ = torch.from_numpy(input_.astype(np.float)).view([1, 1, input_.shape[0], input_.shape[1]])

    res1 = mod1.forward(input_)
    res2 = mod2.forward(input_)
    assert np.allclose(res1.detach(), res2.detach())

It yields the same result.
Thus, as I’ve seen the Model2 approach within some Github projects (ex: https://github.com/Blade6570/Dilation-Pytorch-Semantic-Segmentation/blob/master/CAN_network.py), I am wondering if there is any advantage of using it.

Passing padding to conv will pad before conv (implicitly), so you compute the same thing twice, indeed.

Best regards

Thomas