Model not detecting any object

Hi

I am doing object detection and my model’s loss is deceasing but the model is not giving good output. This is my model and dataset definition.

import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
import cv2
import numpy as np
import torch
import mat73
from PIL import Image

def getmodel():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    num_classes = 11
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

class Data(Dataset):
  def __init__(self, d, transforms=None):
    self.transform = transforms
    self.data = mat73.loadmat(d)    

  def __getitem__(self,idx):
    box = []
    label = []
    bbox = self.data['digitStruct']['bbox'][idx]
    img = self.data['digitStruct']['name'][idx]
    
    #img = cv2.imread("./drive/My Drive/train_main/train/" + img)
    img = Image.open("./drive/My Drive/train_main/train/" + img).convert("RGB")

    if (isinstance(bbox['label'], list)):
      size = len(bbox['label'])
      
      for i in range(size):
        box.append([bbox['left'][i] + 0, bbox['top'][i] + 0, bbox['left'][i] + bbox['width'][i], bbox['top'][i] + bbox['height'][i]])
        label.append(int(bbox['label'][i] + 0))
    else:
        box.append([bbox['left']+0, bbox['top']+0, bbox['left']+bbox['width'], bbox['top']+bbox['height']])
        label.append(int(bbox['label'] + 0))

    target = {}
    target["boxes"] = torch.tensor(box, dtype = torch.float)
    target["labels"] = torch.tensor(label, dtype = torch.int64)
    
    img = np.array(np.rot90(img, axes=(0,2)))
    img = torch.tensor(img,dtype=torch.float32)

    return img,target

  def __len__(self):
    return len(self.data['digitStruct']['bbox']);

This is my training loop

def epoch(model, optimizer, dataset, path, lr_scheduler):
  device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
  
  model.to(device)
  for c in range(int(len(dataset)/1000)):
    images, target = dataset[c]
    images = images.to(device)

    target["boxes"] = target["boxes"].to(device)
    target["labels"] = target["labels"].to(device)

    loss_dict = model(images.unsqueeze(0),[target])
    losses = sum(loss for loss in loss_dict.values())

    optimizer.zero_grad()
    losses.backward()
    optimizer.step()

    print(losses)
    print("percent done: " , (c*100/int(len(dataset)/1000)))
    torch.save(model.state_dict(), path)
    #torch.save(model.state_dict(), "./net.pth")
    #lr_scheduler.step(losses)

def train(epochs, model, optimizer, dataset, path="./drive/My Drive/SHVN.pth", retrain=False):
  if retrain:
    model.load_state_dict(torch.load(path))
  lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True)
  for i in range(epochs):
    epoch(model, optimizer, dataset, path, lr_scheduler)

My loss was getting very low around 0.5, 0.6. but the output I got just from a test image is this:

{'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<StackBackward>), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward>)}
tensor([], device='cuda:0', dtype=torch.int64)

I don’t know what the problem is. Thank you for the help in advance!

I am using SVHN dataset format 1.

Hi @neem-41. I am also facing the same problem have you resolved the problem

Hi @Tupakula_Mallikarjun,

I fixed two things in my code. In my constructor on Data class I had forgotten to call super. So i added this in the code:

class Data(Dataset):
 def __init__(self,d,transforms=None):
  super().__init__()
  self.transforms = transforms
  self.data = mat73.loadmat(d)

Then I used dataloader to load images in batch of 2 and trained them. I saw some significant change with that but I would also advice increasing the number of epochs.

Another thing that I changed was instead of converting image using np.rot90 I did this:

img = cv2.cvtColor(i,cv2.COLOR_BGR2RGB).astype(np.float32)
img /= 255.0

I am not sure why the image thing helped but it did. Hope this helps!