I was trying to compute normal from depth and currently, I’m using for loop
to compute normal of each pixel. I can’t think of a way to compute normal of each pixel at the same time because a window of each pixel is used to compute the normal of that pixel. Does anyone have any ideas on how to vectorize it? XYZ is the back-projected 3d coordinate from depth.
Using for loop
is extremely slow, it takes roughly 13s
to compute a normal map from a 240x320 depth map.
def DepthToNormal(Z, win_sz=15):
def cal_patch(xy, z, i, j, sz, dep_th=0.1):
cent_d = z[0, i+sz//2, j+sz//2]
val_mask = (torch.abs(z[0, i:i+sz, j:j+sz] - cent_d) < dep_th * cent_d) & (
z[0, i:i+sz, j:j+sz] > 0).unsqueeze(0).repeat(3, 1, 1)
if val_mask.sum() < 10:
return torch.tensor([0., 0., 0.])
comb_patch = torch.cat((xy[:, i:i+sz, j:j+sz], z[:, i:i+sz, j:j+sz]), dim=0)
A = comb_patch[val_mask].view(3, -1).permute(1, 0)
A_t = A.permute(1, 0)
A_tA = torch.mm(A_t, A)
try:
n = torch.sum(torch.mm(A_tA.inverse(), A_t), dim=1)
except:
n = torch.tensor([0., 0., 0.])
return n
intrinsics = torch.tensor([577.591, 318.905, 578.73, 242.684]).cuda()
batch, _, height, width = Z.shape
yy, xx = torch.meshgrid(torch.tensor([i for i in range(height)]),
torch.tensor([i for i in range(width)]))
xx = xx.unsqueeze(dim=0).unsqueeze(dim=0).repeat(batch, 1, 1, 1).cuda()
yy = yy.unsqueeze(dim=0).unsqueeze(dim=0).repeat(batch, 1, 1, 1).cuda()
XY = torch.cat((Z * (xx - intrinsics[1]) / intrinsics[0],
Z * (yy - intrinsics[3]) / intrinsics[2]), dim=1)
normal = torch.zeros((batch, 3, height, width))
XY = F.pad(XY, (win_sz // 2, win_sz // 2, win_sz // 2, win_sz // 2), mode='reflect')
Z = F.pad(Z, (win_sz // 2, win_sz // 2, win_sz // 2, win_sz // 2), mode='reflect')
for b in range(batch):
for i in range(height):
for j in range(width):
normal[b, :, i, j] = cal_patch(XY[b], Z[b], i, j, win_sz)
return normal