Hi, I’m working on a project that needs to implement reprojection of RGB images with the given depth, knowing all the extrinsic and intrinsic parameters.
With an example is better understood:
In one hand, I have a RGB image (RGB_img) and a Depth map (depth_img) corresponding to that RGB image. I also have the intrinsic and extrinsic parameters of the camera in that view.
On the other hand, I also have the intrinsic and extrinsic parameters of the camera where I want to obtain the RGB image (by unprojecting the RGB_img to the 3D world, and reprojecting to this new view).
Intrinsic–>
np.matrix([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]])
Extrinsic (no rotation involved in my problem)–>
np.matrix([[1,0,0,tx],
[0,1,0,ty],
[0,0,1,tz]])
Like this–>
K_ref-->[[1.37797465e+03 0.00000000e+00 9.77399446e+02]
[0.00000000e+00 1.37731658e+03 5.53979077e+02]
[0.00000000e+00 0.00000000e+00 1.00000000e+00]]
K_cam-->[[1.37797465e+03 0.00000000e+00 9.77399446e+02]
[0.00000000e+00 1.37731658e+03 5.53979077e+02]
[0.00000000e+00 0.00000000e+00 1.00000000e+00]]
Rt_ref-->[[ 1. 0. 0. -0.0523642 ]
[ 0. 1. 0. -0.05032519]
[ 0. 0. 1. 0.00114601]]
Rt_cam-->[[1. 0. 0. 0.]
[0. 1. 0. 0.]
[0. 0. 1. 0.]]
For that, I’ve created this function using numpy, an it is able to do it correctly:
def reproject_RGB(rgb_img, depth_img, K_ref, K_cam, Rt_ref, Rt_cam):
"""
This function reprojects RGB images.
rgb_img: RGB_src image (h, w, 3)
depth_img: Depth_src image (h, w)
K_ref: Intrinsic matrix from src
K_cam: Intrinsic matrix from dst
Rt_ref: Extrinsic matrix from src
Rt_cam: Extrinsic matrix from dst
"""
i = np.tile(np.arange(depth_img.shape[1]), depth_img.shape[0])
j = np.repeat(np.arange(depth_img.shape[0]), depth_img.shape[1])
ones_vect = np.ones(depth_img.shape[0]*depth_img.shape[1])
z = depth_img[j,i]
z[z==0] = 1
img_mx = np.stack([i, j, ones_vect, 1/z], axis=0)
# ************ REPROJECTION ************
low_vect = np.array([0,0,0,1])
left_mx = np.matmul(K_cam, Rt_cam)
right_mx = np.linalg.inv(np.vstack((np.matmul(K_ref, Rt_ref), low_vect)))
P = np.multiply(np.matmul(right_mx, img_mx), z)
res_mx_d = np.matmul(left_mx, P)/z
u = res_mx_d[0,:]
v = res_mx_d[1,:]
del_u_ix = np.where(u>1919)
del_v_ix = np.where(v>1055)
del_u_ix_neg = np.where(u<0)
del_v_ix_neg = np.where(v<0)
del_ix_neg = np.unique(np.append(del_u_ix_neg, del_v_ix_neg))
del_ix_excess = np.unique(np.append(del_u_ix, del_v_ix))
del_ix = np.unique(np.append(del_ix_neg, del_ix_excess))
u = np.delete(u, del_ix)
v = np.delete(v, del_ix)
i = np.delete(i, del_ix)
j = np.delete(j, del_ix)
u = np.around(u, 0).astype('int')
v = np.around(v, 0).astype('int')
out_img = np.zeros((1056,1920,3))
out_img[v,u,:] = rgb_img[j,i,:]
return out_img
The problem is that I need to make it differentiable (to not lose the gradient computation of pytorch).
My attempt is the following, which does not work correctly:
import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F
def reproject_RGB_pytorch(rgb_img, depth_img, K_ref, K_cam, Rt_ref, Rt_cam):
#Because the network outputs the images in PyTorch format --> (batch, dims, height, width)
# 1.- Create the dimensions (tensors)
i = torch.tensor(np.tile(np.arange(depth_img.shape[3]), depth_img.shape[2]))
j = torch.tensor(np.repeat(np.arange(depth_img.shape[2]), depth_img.shape[3]))
ones_vect = torch.ones(depth_img.shape[2]*depth_img.shape[3])
# 2.- Set 0 value to 1 at least (avoid errors)
z = depth_img[0,0,j,i]
z[z==0] = 1
# 3.- Create image matrix
img_mx = torch.stack([i, j, ones_vect, 1/z], axis=0)
# ************ REPROJECTION ************
low_vect = np.array([0,0,0,1])
left_mx = torch.tensor(np.matmul(K_cam, Rt_cam))
right_mx = torch.tensor(np.linalg.inv(np.vstack((np.matmul(K_ref, Rt_ref), low_vect))))
P = torch.matmul(right_mx, img_mx.double())
P = z*P
res_mx_d = torch.matmul(left_mx, P)/z
u = res_mx_d[0,:]
v = res_mx_d[1,:]
del_u_ix = np.where(u>1919)
#del_v_ix = np.where(v>1079)
del_v_ix = np.where(v>1055)
del_u_ix_neg = np.where(u<0)
del_v_ix_neg = np.where(v<0)
del_ix_neg = np.unique(np.append(del_u_ix_neg, del_v_ix_neg))
del_ix_excess = np.unique(np.append(del_u_ix, del_v_ix))
del_ix = np.unique(np.append(del_ix_neg, del_ix_excess))
#Filter excess on upper limit
u=u[u<=1919]
v=v[v<=1055]
i=i[i<=1919]
j=j[j<=1055]
#Filter excess on lower limit
u=u[u>=0]
v=v[v>=0]
i=i[i>=0]
j=j[j>=0]
flow_grid = res_mx_d[:2].reshape((1,1056,1920,2))
out_img=F.grid_sample(rgb_img,flow_grid.float())
#print(out_img.shape)
return out_img
Can anyone help me to make it work? Thank you