Somebody Just Help me

The dataset:

class carData(Dataset):
  def __init__(self,image_dir):
    self.df=pd.read_csv('Images/data/train_solution_bounding_boxes (1).csv')
    self.dir=image_dir
    self.image_ids=self.df['image'].unique()

  
  def __getitem__(self,index):
    image_id=self.image_ids[index]
    bboxes=self.df[self.df['image']==image_id]

    img_path=os.path.join(self.dir,image_id)
    image=cv.imread(img_path,cv.IMREAD_COLOR)
    image=cv.cvtColor(image,cv.COLOR_BGR2RGB).astype(np.float32)
    image /=255.0

    boxes=bboxes[['xmin','ymin','xmax','ymax']].values

    area= (boxes[:,3] -boxes[:,1]) * (boxes[:,2] - boxes[:,0])
    
    boxes=torch.as_tensor(boxes,dtype=torch.float32)
    area=torch.as_tensor(area,dtype=torch.float32)

    labels=torch.ones((bboxes.shape[0],),dtype=torch.int64)
    iscrowd=torch.zeros((bboxes.shape[0],),dtype=torch.int64)

    target={}
    target['boxes']=boxes
    target['labels']=labels
    target['image_id'] = torch.tensor([index])
    target['area']=area
    target['iscrow']=iscrowd
    
    image=torchvision.transforms.ToTensor()(image)
    image=image.permute(1,2,0)

    
    return image,target

  def __len__(self):
    return self.image_ids.shape[0]

Dataloader:

def collate_fn(batch):
    return tuple(zip(*batch))

train_data_loader = DataLoader(
    dataset,
    batch_size=6,
    shuffle=False,
    collate_fn=collate_fn
)

Model:

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
num_classes = 2 # 1 class (car) + background

in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace pre-trained head with new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Hyperparameters:

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

train:

def train(model,optim,dataloader,path,num_of_epochs):
  itr=1
  try:
    os.mkdir(path)
  except: path=path

  for epoch in range(num_of_epochs):
    for images,targets in dataloader:
      images=list(image.to(device) for image in images)
      targets=[{k:v.to(device) for k,v in t.items()} for t in targets]

      loss_dict=model(images,targets)
      losses=sum(loss for loss in loss_dict.values())
      losses_value=losses.item()

      optim.zero_grad()
      losses.backward()
      optim.step()

      if itr %20 == 0:
        print(f"iteration {itr} , Loss : {losses_value}")
    print(f"epoch: {epoch} , Loss: {losses_value}")
    torch.save(model.state_dict(),f"{path}/model.pt")

Error :

RuntimeError                              Traceback (most recent call last)
<ipython-input-174-8a2434479479> in <module>()
----> 1 train(model,optimizer,train_data_loader,'models',2)

5 frames
<ipython-input-173-fb5393e1415a> in train(model, optim, dataloader, path, num_of_epochs)
     10       targets=[{k:v.to(device) for k,v in t.items()} for t in targets]
     11 
---> 12       loss_dict=model(images,targets)
     13       losses=sum(loss for loss in loss_dict.values())
     14       losses_value=losses.item()

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
     75             original_image_sizes.append((val[0], val[1]))
     76 
---> 77         images, targets = self.transform(images, targets)
     78 
     79         # Check for degenerate boxes

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/transform.py in forward(self, images, targets)
    109                 raise ValueError("images is expected to be a list of 3d tensors "
    110                                  "of shape [C, H, W], got {}".format(image.shape))
--> 111             image = self.normalize(image)
    112             image, target_index = self.resize(image, target_index)
    113             images[i] = image

/usr/local/lib/python3.7/dist-packages/torchvision/models/detection/transform.py in normalize(self, image)
    134         mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
    135         std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
--> 136         return (image - mean[:, None, None]) / std[:, None, None]
    137 
    138     def torch_choice(self, k: List[int]) -> int:

RuntimeError: The size of tensor a (380) must match the size of tensor b (3) at non-singleton dimension 0

Sitting whole day. can’t find any solution.

If I’m not mistaken, in your model’s forward pass in rcnn module self.transform trying to normalize image, but instead of image with 3 channels it gets 380

You are the man! Finally. Sitting whole day. Thank you so much!