Hello, I’m using a Pytorch to optimize the layer images from the perspective view images. When I use the loss.backpropagation, it fails to compute the gradient. So I tried to use the clone() function. But the problem still remains. Here are the codes.
The function translate_img implements the role of cv2.affine, and the function perspective returns the perspective view for the given angle.
layer_ortho = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device)
Mask = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device)
def translate_img(src_img,shift_distance,shape_of_out_img):
h,w = src_img.shape[:2]
x_distance,y_distance = shift_distance[0],shift_distance[1]
ts_mat = np.array([[1,0,x_distance],[0,1,y_distance]])
torch.from_numpy(ts_mat).to(device)
out_img = torch.zeros(shape_of_out_img).to(device)
for i in range(h):
for j in range(w):
origin_x, origin_y = j, i
origin_xy = np.array([origin_x, origin_y,1])
new_xy = np.dot(ts_mat,origin_xy)
new_x,new_y = int(new_xy[0]),int(new_xy[1])
if 0<new_x<w and 0<new_y<h:
out_img[new_y,new_x,:] = src_img[i,j,:]
return out_img
# Off-axis perspective view
def perspective(layer_image,layer_ortho,Mask,z_back,z_front,dx,dy,theta_x,theta_y,device):
ny, nx, n_depth = layer_image.shape[0], layer_image.shape[1], layer_image.shape[3]
z = torch.linspace(z_front,z_back,n_depth)
x0, y0 = z * np.tan(theta_x*math.pi/180), z * np.tan(theta_y*math.pi/180)
# Problem occurs here(initialization)
layer_ortho[:,:,:,n_depth-1] = layer_image[:,:,:,n_depth-1].clone()
for n_d in reversed(range(n_depth-1)):
shift_distance = (x0[n_d]-x0[n_depth-1]).numpy()/dx, (y0[n_d]-y0[n_depth-1]).numpy()/dy
ortho_tmp = translate_img(layer_image[:,:,:,n_d].clone(),shift_distance,(ny,nx,3))
Mask[:,:,:,n_d] = torch.from_numpy(np.where(ortho_tmp.detach().cpu().numpy() > 0, 0, 1))
layer_ortho[:,:,:,n_d] = layer_ortho[:,:,:,n_d+1]*Mask[:,:,:,n_d]+ortho_tmp
return layer_ortho[:,:,:,0]
theta = 0, 0
parallel_view = perspective(layer_image,layer_ortho,Mask,z_range[0],z_range[1],dx,dy,theta[0],theta[1],device)
plt.imshow(parallel_view.detach().cpu().numpy())
plt.show()
mse_loss = nn.MSELoss().to(device) # + SSIM
var = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).cuda().requires_grad_(True) #layer_image.requires_grad_(True)
LR = 0.3
optimizer = optim.Adam([var], lr=LR)
theta_x = torch.linspace(-maxdiff,maxdiff,mx)
theta_y = torch.linspace(-maxdiff,maxdiff,my)
layer_ortho = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device)
Mask = torch.zeros(nx,ny,3,n_depth,dtype=torch.float32).to(device)
for iter in range(100):
time_start = time.time()
l = 0
for i in range(my):
for j in range(mx):
recon_target = perspective(var,layer_ortho,Mask,z_range[0],z_range[1],dx,dy,theta_x[i],theta_y[j],device)
l = l + 1/(my*mx)*mse_loss(recon_target, LF[i,j,:,:,:].to(device))
print(l)
optimizer.zero_grad()
l.backward()
optimizer.step()
time_end = time.time()
if (i) % 10 == 0:
print(l, time_end - time_start)
The code implements the optimization using Adam optimizer.
Any help will be appreciated!