def DepthError(depth_pred, depth_gt):
L_d, Mcard = 0.0, 0
B, c, H, W = depth_pred.shape #batch, channels, height, width
for b in range(B): #B is batch size, iterating over the images in a batch
depth_pred2D = depth_pred[b][0] # bth depth image, depth images have 1 channel thus 0
depth_gt2D = depth_gt[b][0]
for h in range(H): # iterate over pixels along height
for w in range(W):
if depth_gt2D[h][w] <= 0:
continue
Mcard += 1
diff = torch.sub(depth_pred2D[h][w], depth_gt2D[h][w])
L_d += torch.abs(diff)
L_d = torch.div(L_d, Mcard)
return L_d #return depth loss
train_data = next(iter(train_dataloader))
image = train_data['image'].cuda()
depth_pred = depth_net(image) #estimate depth by passing image through network
L_d = DepthError(depth_pred, depth_gt) #depth loss

My code seems to be terribly slow, also I am trying to overfit to one batch by setting batch size to be 10. But my loss seems to be stuck at the same value and is not decreasing.
Is the above method the right way to process images in a batch. Also, am I doing things correctly?

Hi, your problem seems to be easily “broadcastable”! Using broadcasting and such is much faster than looping in Python. From my understanding you want to do L1-loss (with some scaling), where you exclude the loss caused from negative depth values in the ground truth.

Here is my attempt at implementing the loss, I tested with some dummy data, and our results agree.

import torch
def DepthError(depth_pred, depth_gt):
'''
"Loopy loopy original solution"
'''
L_d, Mcard = 0.0, 0
B, c, H, W = depth_pred.shape #batch, channels, height, width
for b in range(B): #B is batch size, iterating over the images in a batch
depth_pred2D = depth_pred[b][0] # bth depth image, depth images have 1 channel thus 0
depth_gt2D = depth_gt[b][0]
for h in range(H): # iterate over pixels along height
for w in range(W):
if depth_gt2D[h][w] <= 0:
continue
Mcard += 1
diff = torch.sub(depth_pred2D[h][w], depth_gt2D[h][w])
L_d += torch.abs(diff)
L_d = torch.div(L_d, Mcard)
return L_d #return depth loss
def DepthErrorVectorized(depth_pred, depth_gt):
'''
"Vectorized (that means using broadcasting) solution"
'''
diff = depth_pred - depth_gt # Difference
diff = diff[depth_gt > 0] # Obtain the values that satisfies depth_gt > 0
L_d = diff.abs().sum() # Abs and sum to get L1 loss
Mcard = len(diff.view(-1)) # Flatten diff using .view(-1), then get cardinality
return L_d / Mcard # Scale L1 loss with cardinality
# Create some dummy ground truth data
y = torch.stack([
torch.full((1, 64, 64), 2), # Depth image of 2
torch.full((1, 64, 64), 1), # Depth image of 1
torch.full((1, 64, 64), -1), # Depth image of -1
]).to(torch.float32)
# Create dummy predictions
pred = torch.ones_like(y) # "Predict all ones", intuitively loss should be 0.5000
print(DepthError(pred, y)) # tensor(0.5000)
print(DepthErrorVectorized(pred, y)) # tensor(0.5000)