Need help to create depth and mask prediction in an image

Hi, I want to predict depth and mask in an image. The depth will be predicted for roads and i want to mask a car in the image. I have implemented UNet for this problem.
The dataloader code is
class DepthDataSet(Dataset):
“”" Dataset for Depth and mask prediction “”"

def init(self, conf, fg_bg_dir, mask_dir, depth_dir, transform=None, scale=1):
“”"
Args:
conf = configuration file
image_dir: Directory to images of depth
mask_dir: Directory to mask images of depth
transformation: transformations applied on that image
“”"
self.conf = conf
self.fg_bg_dir = fg_bg_dir
self.mask_dir = mask_dir
self.depth_dir = depth_dir
self.scale = scale
self.ids = [file for file in listdir(fg_bg_dir) if not file.startswith(’.’)]

def len(self):
return len(self.ids)

@classmethod
def preprocess(cls, pil_img, scale):
w, h = pil_img.size
newW, newH = int(scale * w), int(scale * h)
assert newW > 0 and newH > 0, ‘Scale is too small’
pil_img = pil_img.resize((newW, newH))

  img_nd = np.array(pil_img)

  if len(img_nd.shape) == 2:
      img_nd = np.expand_dims(img_nd, axis=2)

  # HWC to CHW
  img_trans = img_nd.transpose((2, 0, 1))
  if img_trans.max() > 1:
      img_trans = img_trans / 255

  return img_trans

def getitem(self, i):
idx = self.ids[i]
# image_file = glob(self.image_dir + ‘/’+ idx )
# mask_file = glob(self.mask_dir + ‘/’+ idx )
# print(idx)
# print(self.image_dir)
# print(mask_file)

# assert len(mask_file) > 1, "No mask found"
# assert len(image_file) > 1, "No image found"

mask = Image.open(self.mask_dir + '/'+ idx)
fg_bg = Image.open(self.fg_bg_dir + '/'+ idx)
depth = Image.open(self.depth_dir + '/'+ idx)
#if image.size != mask.size:
#assert image.size == mask.size
#img = self.preprocess(image, self.scale)
#mask = self.preprocess(mask, self.scale)

return {
        'image': torch.from_numpy(np.array(fg_bg)), 
        'mask': torch.from_numpy(np.array(mask)), 
        'depth': torch.from_numpy(np.array(depth))
       }

I am feeding the image to the model and checking the loss for mask and depth.
images = images.to(device=self.device, dtype=torch.float)
mask_type = torch.float32 if self.model.n_classes == 1 else torch.long
mask = mask.to(device=device, dtype=mask_type)
depth = depth.to(device=device, dtype=mask_type)

        mask_pred = self.model(images)
        loss_mask = self.criterion(mask_pred, mask)
        loss_depth = self.criterion(mask_pred, depth)
        loss = loss_mask + loss_depth

        train_loss += loss_mask.item() + loss_depth.item()

My result is not proper , Can you please help me to make this solution better.
Or is it the correct approach to add up the loss against the ground truths.

Thanks,

Which criterion are you using and what ranges are expected in mask and depth?
Also, you shouldn’t call item() on the losses, if you plan to call train_loss.backward().

PS: Could you format the code by wrapping it into three backticks ``` as it’ll be easier for debugging? :wink:

Hi,
Criterion ------> CrossEntropyLoss.
Input image --------->374
mask image --------> 63
depth image --------> 3949

The input image i am forwarding through my model and the mask and depth image i am taking as ground truth.
I am doing loss.backward() not on the train loss.
This is my train function

def train(self, train_acc):
        self.model.train()
        pbar = tqdm(self.train_loader)
        train_loss = 0
        length = len(self.train_loader)
        print("Length of train loader is {}".format(length))
        device = self.device
        self.model.to(self.device)
        for batch in enumerate(pbar):
            images = batch[1]['image']
            mask = batch[1]['mask']
            depth = batch[1]['depth']
            print(len(images), len(mask), len(depth))
            images = images.to(device=self.device, dtype=torch.float)
            mask_type = torch.float32 if self.model.n_classes == 1 else torch.long
            mask = mask.to(device=device, dtype=mask_type)
            depth = depth.to(device=device, dtype=mask_type)

            mask_pred = self.model(images)
           
            loss_mask = self.criterion(mask_pred, mask)
            loss_depth = self.criterion(mask_pred, depth)
            loss = loss_mask +  loss_depth 

            train_loss += loss_mask.item() + loss_depth.item()
            #writer.add_scalar('Loss/train', train_loss, global_step)

            pbar.set_postfix(**{'loss (batch)': train_loss})
            self.optimizer.zero_grad()

            # Backpropagation
            loss.backward()
            self.optimizer.step()
            self.scheduler.step()
        
            accuracy = 100*(train_loss/length)
            pbar.set_description(desc= f'Loss={train_loss} Accuracy={accuracy:0.2f}')
            train_acc.append(accuracy)

I don’t know whether it is a correct way or not to add losses for the purpose of depth and mask prediction.
My accuracy also goes to negative.
Please help.
Thanks

Based on the code it seems you are using nn.CrossEntropyLoss and LongTensors for the mask and depth. While this approach seems to be fine for the mask target, I’m not sure it is right for the depth estimation, which seems to be a regression task.
I would recommend to try to keep depth as a FloatTensor and use nn.MSELoss for the loss_depth calculation.

ok will try with that, thannks