Fast implementation of Gaussian Kernel

Hi,

I have a non-vectorized implementation of 2d gaussian:

def fillWithGaussian(inp,sigma,offset=0):
    center = ((inp.shape[1]-2)+offset)/2.
    inp[:,:,:].fill_(0)
    for i_ in range(inp.shape[1]):
        for j_ in range(inp.shape[2]):
            inp[:,i_, j_]+= 1 / 2 / np.pi / (sigma ** 2) * torch.exp(
            -1 / 2 * ((i_ - center - 0.5) ** 2 + (j_ - center - 0.5) ** 2) / (sigma ** 2))

But although it is differentiable and I want it to remain autograd compatible, it is really slow. Is there any suggestion?

I Found something myself:

def return2DGaussian(resolution,sigma,offset=0):
    kernel_size = resolution

    # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
    x_cord = torch.arange(kernel_size).to(sigma.device)
    x_grid = x_cord.repeat(kernel_size).view(kernel_size, kernel_size)
    y_grid = x_grid.t()
    xy_grid = torch.stack([x_grid, y_grid], dim=-1)

    mean = ((kernel_size - 1)+offset)/2.
    variance = sigma**2.

    # Calculate the 2-dimensional gaussian kernel which is
    # the product of two gaussian distributions for two different
    # variables (in this case called x and y)
    gaussian_kernel = (1./(2.*math.pi*variance)) *\
                      torch.exp(
                          -torch.sum((xy_grid - mean)**2., dim=-1) /\
                          (2*variance)
                      )
    # Make sure sum of values in gaussian kernel equals 1.
    gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel)

    # Reshape to 2d depthwise convolutional weight
    return gaussian_kernel.view( 1, 1, kernel_size, kernel_size)