Reproject RGB images with Depth to another view in a differentiable way (warp with depth)

Hi, I’m working on a project that needs to implement reprojection of RGB images with the given depth, knowing all the extrinsic and intrinsic parameters.

With an example is better understood:
In one hand, I have a RGB image (RGB_img) and a Depth map (depth_img) corresponding to that RGB image. I also have the intrinsic and extrinsic parameters of the camera in that view.
On the other hand, I also have the intrinsic and extrinsic parameters of the camera where I want to obtain the RGB image (by unprojecting the RGB_img to the 3D world, and reprojecting to this new view).

Intrinsic–>

np.matrix([[fx, 0, cx],
                 [0, fy, cy],
                 [0, 0, 1]])

Extrinsic (no rotation involved in my problem)–>

np.matrix([[1,0,0,tx],
                  [0,1,0,ty],
                  [0,0,1,tz]])

Like this–>

K_ref-->[[1.37797465e+03 0.00000000e+00 9.77399446e+02]
 [0.00000000e+00 1.37731658e+03 5.53979077e+02]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]
K_cam-->[[1.37797465e+03 0.00000000e+00 9.77399446e+02]
 [0.00000000e+00 1.37731658e+03 5.53979077e+02]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]
Rt_ref-->[[ 1.          0.          0.         -0.0523642 ]
 [ 0.          1.          0.         -0.05032519]
 [ 0.          0.          1.          0.00114601]]
Rt_cam-->[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]]

For that, I’ve created this function using numpy, an it is able to do it correctly:

def reproject_RGB(rgb_img, depth_img, K_ref, K_cam, Rt_ref, Rt_cam):
    """
    This function reprojects RGB images.
    rgb_img: RGB_src image (h, w, 3)
    depth_img: Depth_src image (h, w)
    K_ref: Intrinsic matrix from src
    K_cam: Intrinsic matrix from dst
    Rt_ref: Extrinsic matrix from src
    Rt_cam: Extrinsic matrix from dst
    """
    i = np.tile(np.arange(depth_img.shape[1]), depth_img.shape[0])
    j = np.repeat(np.arange(depth_img.shape[0]), depth_img.shape[1])
    ones_vect = np.ones(depth_img.shape[0]*depth_img.shape[1])
    z = depth_img[j,i]
    z[z==0] = 1
    img_mx = np.stack([i, j, ones_vect, 1/z], axis=0)
    # ************ REPROJECTION ************
    low_vect = np.array([0,0,0,1])
    left_mx = np.matmul(K_cam, Rt_cam)
    right_mx = np.linalg.inv(np.vstack((np.matmul(K_ref, Rt_ref), low_vect)))
    P = np.multiply(np.matmul(right_mx, img_mx), z)
    res_mx_d = np.matmul(left_mx, P)/z
    u = res_mx_d[0,:]
    v = res_mx_d[1,:]
    del_u_ix = np.where(u>1919)
    del_v_ix = np.where(v>1055)
    del_u_ix_neg = np.where(u<0)
    del_v_ix_neg = np.where(v<0)
    del_ix_neg = np.unique(np.append(del_u_ix_neg, del_v_ix_neg))
    del_ix_excess = np.unique(np.append(del_u_ix, del_v_ix))
    del_ix = np.unique(np.append(del_ix_neg, del_ix_excess))
    u = np.delete(u, del_ix)
    v = np.delete(v, del_ix)
    i = np.delete(i, del_ix)
    j = np.delete(j, del_ix)
    u = np.around(u, 0).astype('int')
    v = np.around(v, 0).astype('int')
    out_img = np.zeros((1056,1920,3))
    out_img[v,u,:] = rgb_img[j,i,:]
    return out_img

The problem is that I need to make it differentiable (to not lose the gradient computation of pytorch).

My attempt is the following, which does not work correctly:

import numpy as np
import torch.nn as nn
import torch
import torch.nn.functional as F


def reproject_RGB_pytorch(rgb_img, depth_img, K_ref, K_cam, Rt_ref, Rt_cam):
    #Because the network outputs the images in PyTorch format --> (batch, dims, height, width)

    # 1.- Create the dimensions (tensors)
    i = torch.tensor(np.tile(np.arange(depth_img.shape[3]), depth_img.shape[2]))
    j = torch.tensor(np.repeat(np.arange(depth_img.shape[2]), depth_img.shape[3]))
    ones_vect = torch.ones(depth_img.shape[2]*depth_img.shape[3])

    
    # 2.- Set 0 value to 1 at least (avoid errors)
    z = depth_img[0,0,j,i]
    z[z==0] = 1
    
    # 3.- Create image matrix
    img_mx = torch.stack([i, j, ones_vect, 1/z], axis=0)
        
    # ************ REPROJECTION ************
    low_vect = np.array([0,0,0,1])
    left_mx = torch.tensor(np.matmul(K_cam, Rt_cam))
    right_mx = torch.tensor(np.linalg.inv(np.vstack((np.matmul(K_ref, Rt_ref), low_vect))))

    P = torch.matmul(right_mx, img_mx.double())
    P = z*P
    
    res_mx_d = torch.matmul(left_mx, P)/z
    
    u = res_mx_d[0,:]
    v = res_mx_d[1,:]
    del_u_ix = np.where(u>1919)
    #del_v_ix = np.where(v>1079)
    del_v_ix = np.where(v>1055)
    del_u_ix_neg = np.where(u<0)
    del_v_ix_neg = np.where(v<0)
    del_ix_neg = np.unique(np.append(del_u_ix_neg, del_v_ix_neg))
    del_ix_excess = np.unique(np.append(del_u_ix, del_v_ix))
    del_ix = np.unique(np.append(del_ix_neg, del_ix_excess))
   
    #Filter excess on upper limit
    u=u[u<=1919]
    v=v[v<=1055]
    i=i[i<=1919]
    j=j[j<=1055]
    #Filter excess on lower limit
    u=u[u>=0]
    v=v[v>=0]
    i=i[i>=0]
    j=j[j>=0]

    flow_grid = res_mx_d[:2].reshape((1,1056,1920,2)) 
    out_img=F.grid_sample(rgb_img,flow_grid.float())
    
    #print(out_img.shape)
    return out_img

Can anyone help me to make it work? Thank you :slight_smile: