Hi all, I have a question about how to efficiently compute a Gaussian density image on a given 2D point set. My current implementation is as the following with a plot:
import torch
import numpy as np
from time import time
import matplotlib.pyplot as plt
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def computeGaussian(p, res=128, kernel_sigma=0.05):
ksize = round(res * kernel_sigma * 6)
sigma = kernel_sigma * res
x = np.linspace(0, 1, int(res))
y = np.linspace(0, 1, int(res))
xv, yv = np.meshgrid(x, y)
txv = torch.from_numpy(xv).unsqueeze(0).float()
tyv = torch.from_numpy(yv).unsqueeze(0).float()
mesh = torch.cat((txv, tyv), 0).to(device) # positions
img = torch.zeros((3, res, res)).to(device) # create an empty gaussian density image
for i in range(p.size()[0]):
center = p[i, 0:2] # go through each point
hw = round(3 * kernel_sigma * res) # only consider band-limited gaussian with 3sigma
coord_center = torch.floor(center * res)
up = torch.max(coord_center[1] - hw, torch.Tensor([0]).to(device)) # take the boundary into account
down = torch.min(coord_center[1] + hw + 1, torch.Tensor([res]).to(device))
left = torch.max(coord_center[0] - hw, torch.Tensor([0]).to(device))
right = torch.min(coord_center[0] + hw + 1, torch.Tensor([res]).to(device))
up = up.long()
down = down.long()
left = left.long()
right = right.long()
r, g, b = 1, 1, 1
# apply gaussian kernel on the pixels based on their distance to the center
img[0, up:down, left:right] += r * torch.exp(
-(mesh.permute(1, 2, 0)[up:down, left:right, :] - center).pow(2).sum(2) / (2 * kernel_sigma ** 2))
img[1, up:down, left:right] += g * torch.exp(
-(mesh.permute(1, 2, 0)[up:down, left:right, :] - center).pow(2).sum(2) / (2 * kernel_sigma ** 2))
img[2, up:down, left:right] += b * torch.exp(
-(mesh.permute(1, 2, 0)[up:down, left:right, :] - center).pow(2).sum(2) / (2 * kernel_sigma ** 2))
return img
pts = torch.rand(16, 2).to(device)
start = time()
density_map = computeGaussian(pts)
end = time()
print('Time: {:.4f}'.format(end - start))
plt.figure(1)
plt.subplot(121)
plt.imshow(density_map.permute(1, 2, 0).detach().cpu().numpy())
plt.subplot(122)
plt.scatter(pts[:, 0].detach().cpu().numpy(), (1 - pts[:, 1]).detach().cpu().numpy())
plt.axis('equal')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.show()
The problem is that I need to loop over all the points since they have different size of grid(i.e. [down:up, left:right]) to consider, because of the boundary. This makes it slow for a large point set and it’s even faster to run on CPU than CUDA. Is there any way to make it without the loop? Thanks in advance.