Faster R-CNN - ResNet.forward() takes 2 positional arguments but 3 were given

Thanks a lot for your help. I have changed my forward function to accept one argument, and I calculate the loss outside of it. A part of my training script looks like that now:

def train(train_data_loader, model):
    print('Training')
    global train_itr
    global train_loss_list
    
     # initialize tqdm progress bar
    prog_bar = tqdm(train_data_loader, total=len(train_data_loader))
    
    for i, data in enumerate(prog_bar):
        optimizer.zero_grad()
        images, targets = data
        
        #images = list(image.to(DEVICE) for image in images)
        images = torch.stack(images).to(DEVICE)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
        
        print('Images shape ' + str(images[0].shape))
        print('Targets ' + str(targets[0]))

        #loss_dict = model(images, targets)
        predictions = model(images)
        
        print('Predictions shape ' + str(predictions[0].shape))
        loss = model.compute_loss(predictions, targets)
        
        losses = loss
        loss_value = losses.item()
        train_loss_list.append(loss_value)

        train_loss_hist.send(loss_value)

        losses.backward()
        optimizer.step()

        train_itr += 1
    
        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return train_loss_list

and a part of my model script is:

class ResNet(nn.Module):
    def __init__(self, ResBlock, layer_list, num_classes, num_channels=3):
        super(ResNet, self).__init__()
        self.in_channels = 64
        
        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size = 3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64)
        self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2)
        self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2)
        self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512*ResBlock.expansion, num_classes)
        
    def forward(self, x):
        x = self.relu(self.batch_norm1(self.conv1(x)))
        print('FIRST: ' + str(x.shape))
        x = self.max_pool(x)
        print('MAXPOOL: ' + str(x.shape))
        x = self.layer1(x)
        print('LAYER1: ' + str(x.shape))
        x = self.layer2(x)
        print('LAYER2: ' + str(x.shape))
        x = self.layer3(x)
        print('LAYER3: ' + str(x.shape))
        x = self.layer4(x)
        print('LAYER4: ' + str(x.shape))
        x = self.avgpool(x)
        print('AVG: ' + str(x.shape))
        x = x.view(x.size(0), -1)
        print('FC: ' + str(x.shape))
        x = self.fc(x)
               
        return x
        
    def _make_layer(self, ResBlock, blocks, planes, stride=1):
        ii_downsample = None
        layers = []
        
        if stride != 1 or self.in_channels != planes*ResBlock.expansion: #tto en commented out
            ii_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes*ResBlock.expansion)
            )
            
        layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride))
        self.in_channels = planes*ResBlock.expansion
        
        for i in range(blocks-1):
            layers.append(ResBlock(self.in_channels, planes))
            
        return nn.Sequential(*layers)
        
    def compute_loss(self, predictions, targets):
        target_labels = torch.cat([t['labels'] for t in targets])

        loss = F.cross_entropy(predictions, target_labels)
        return loss

However, I am getting this error now:

EPOCH 1 of 10
Training
  0%|                                                                                   | 0/48 [00:00<?, ?it/s]Images shape torch.Size([3, 416, 416])
Targets {'boxes': tensor([[210.0800, 188.7600, 314.0800, 252.2000]]), 'labels': tensor([1]), 'area': tensor([6597.7603]), 'iscrowd': tensor([0]), 'image_id': tensor([64])}
FIRST: torch.Size([3, 64, 208, 208])
MAXPOOL: torch.Size([3, 64, 104, 104])
LAYER1: torch.Size([3, 256, 104, 104])
LAYER2: torch.Size([3, 512, 52, 52])
LAYER3: torch.Size([3, 1024, 26, 26])
LAYER4: torch.Size([3, 2048, 13, 13])
AVG: torch.Size([3, 2048, 1, 1])
FC: torch.Size([3, 2048])
Predictions shape torch.Size([5])
Loss: 1.3513:   2%|█▎                                                           | 1/48 [00:01<01:04,  1.38s/it]Images shape torch.Size([3, 416, 416])
Targets {'boxes': tensor([[165.8800, 133.1200, 323.4400, 278.0267]]), 'labels': tensor([4]), 'area': tensor([22831.4961]), 'iscrowd': tensor([0]), 'image_id': tensor([100])}
FIRST: torch.Size([3, 64, 208, 208])
MAXPOOL: torch.Size([3, 64, 104, 104])
LAYER1: torch.Size([3, 256, 104, 104])
LAYER2: torch.Size([3, 512, 52, 52])
LAYER3: torch.Size([3, 1024, 26, 26])
LAYER4: torch.Size([3, 2048, 13, 13])
AVG: torch.Size([3, 2048, 1, 1])
FC: torch.Size([3, 2048])
Predictions shape torch.Size([5])
Loss: 2.5029:   4%|██▌                                                          | 2/48 [00:02<01:01,  1.33s/it]Images shape torch.Size([3, 416, 416])
Targets {'boxes': tensor([[232.9600, 199.1600, 307.8400, 307.3200]]), 'labels': tensor([1]), 'area': tensor([8099.0200]), 'iscrowd': tensor([0]), 'image_id': tensor([4])}
FIRST: torch.Size([3, 64, 208, 208])
MAXPOOL: torch.Size([3, 64, 104, 104])
LAYER1: torch.Size([3, 256, 104, 104])
LAYER2: torch.Size([3, 512, 52, 52])
LAYER3: torch.Size([3, 1024, 26, 26])
LAYER4: torch.Size([3, 2048, 13, 13])
AVG: torch.Size([3, 2048, 1, 1])
FC: torch.Size([3, 2048])
Predictions shape torch.Size([5])
Loss: 0.8967:   6%|███▊                                                         | 3/48 [00:03<00:57,  1.27s/it]Images shape torch.Size([3, 416, 416])
Targets {'boxes': tensor([[136.5867, 164.8400, 288.4267, 328.1200]]), 'labels': tensor([2]), 'area': tensor([24792.4336]), 'iscrowd': tensor([0]), 'image_id': tensor([122])}
FIRST: torch.Size([3, 64, 208, 208])
MAXPOOL: torch.Size([3, 64, 104, 104])
LAYER1: torch.Size([3, 256, 104, 104])
LAYER2: torch.Size([3, 512, 52, 52])
LAYER3: torch.Size([3, 1024, 26, 26])
LAYER4: torch.Size([3, 2048, 13, 13])
AVG: torch.Size([3, 2048, 1, 1])
FC: torch.Size([3, 2048])
Predictions shape torch.Size([5])
Loss: 0.8967:   6%|███▊                                                         | 3/48 [00:04<01:03,  1.42s/it]
Traceback (most recent call last):
  File "/lustre/alice3/scratch/aiadapt/va95/od/2/20211025_Custom_Object_Detection_using_PyTorch_Faster_RCNN/src/engine.py", line 134, in <module>
    train_loss = train(train_loader, model)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/lustre/alice3/scratch/aiadapt/va95/od/2/20211025_Custom_Object_Detection_using_PyTorch_Faster_RCNN/src/engine.py", line 43, in train
    loss = model.compute_loss(predictions, targets)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/lustre/alice3/scratch/aiadapt/va95/od/2/20211025_Custom_Object_Detection_using_PyTorch_Faster_RCNN/src/model.py", line 139, in compute_loss
    loss = F.cross_entropy(predictions, target_labels)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/v/va95/miniconda3/envs/odenv/lib/python3.11/site-packages/torch/nn/functional.py", line 3053, in cross_entropy
    return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: Expected input batch_size (3) to match target batch_size (5).

I have check another thread that covers this error (ValueError: Expected input batch_size (324) to match target batch_size (4) - #26 by william_hero), but unfortunately, I couldn’t figure out the problem, yet. Could you please help me with that?