The size of tensor a (416) must match the size of tensor b (3) at non-singleton dimension 0

bugramurat · February 9, 2023, 7:38am

I’m trying to train a Faster RCNN model with custom dataset but I’m getting this error. 416 is resized image size in dataset. How this size can match with 3?

Error output:

Traceback (most recent call last):
  File "c:\Users\lemon\Desktop\ap_py_2\train.py", line 134, in <module>
    train_loss = train(train_loader, model)
  File "c:\Users\lemon\Desktop\ap_py_2\train.py", line 40, in train
    loss_dict = model(images, targets)
  File "C:\Users\lemon\miniconda3\envs\cnn-env-03\lib\site-packages\torch\nn\modules\module.py", line 1131, in _call_impl
    return forward_call(*input, **kwargs)
  File "C:\Users\lemon\miniconda3\envs\cnn-env-03\lib\site-packages\torchvision\models\detection\generalized_rcnn.py", line 85, in forward
    images, targets = self.transform(images, targets)
  File "C:\Users\lemon\miniconda3\envs\cnn-env-03\lib\site-packages\torch\nn\modules\module.py", line 1131, in _call_impl
    return forward_call(*input, **kwargs)
  File "C:\Users\lemon\miniconda3\envs\cnn-env-03\lib\site-packages\torchvision\models\detection\transform.py", line 129, in forward
    image = self.normalize(image)
  File "C:\Users\lemon\miniconda3\envs\cnn-env-03\lib\site-packages\torchvision\models\detection\transform.py", line 157, in normalize
    return (image - mean[:, None, None]) / std[:, None, None]
RuntimeError: The size of tensor a (416) must match the size of tensor b (3) at non-singleton dimension 0

train.py:

from config import (
    DEVICE, NUM_CLASSES, NUM_EPOCHS, OUT_DIR,
    VISUALIZE_TRANSFORMED_IMAGES, NUM_WORKERS,
)
from model import create_model
from custom_utils import Averager, SaveBestModel, save_model, save_loss_plot
from tqdm import tqdm
from datasets import (
    create_train_dataset, create_valid_dataset, 
    create_train_loader, create_valid_loader
)
import torch
import matplotlib.pyplot as plt
import time
import gc
plt.style.use('ggplot')

gc.collect()
torch.cuda.empty_cache()

# function for running training iterations
def train(train_data_loader, model):
    print('Training')
    global train_itr
    global train_loss_list
    
    # initialize tqdm progress bar
    prog_bar = tqdm(train_data_loader, total = len(train_data_loader))

    # for i, data in enumerate(prog_bar):
    for x, y_batch in prog_bar:
        optimizer.zero_grad()
        # images, targets = data
        images = x
        targets = y_batch
        
        images = list(torch.from_numpy(image).to(DEVICE) for image in images)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        train_loss_list.append(loss_value)
        train_loss_hist.send(loss_value)
        losses.backward()
        optimizer.step()
        train_itr += 1
    
        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
        del loss_value
        gc.collect()
        torch.cuda.empty_cache()
    
    return train_loss_list

    # function for running validation iterations
def validate(valid_data_loader, model):
    print('Validating')
    global val_itr
    global val_loss_list
    
    # initialize tqdm progress bar
    prog_bar = tqdm(valid_data_loader, total=len(valid_data_loader))

    # for i, data in enumerate(prog_bar):
    for x, y_batch in prog_bar:
        # images, targets = data
        images = x
        targets = y_batch
        
        images = list(image.to(DEVICE) for image in images)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
        
        with torch.no_grad():
            loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        val_loss_list.append(loss_value)

        val_loss_hist.send(loss_value)

        val_itr += 1

        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
        del loss_value
        gc.collect()
        torch.cuda.empty_cache()

    return val_loss_list

if __name__ == '__main__':
    train_dataset = create_train_dataset()
    valid_dataset = create_valid_dataset()
    train_loader = create_train_loader(train_dataset, NUM_WORKERS)
    valid_loader = create_valid_loader(valid_dataset, NUM_WORKERS)

    print(f"Number of training samples: {len(train_dataset)}")
    print(f"Number of validation samples: {len(valid_dataset)}\n")
    # initialize the model and move to the computation device
    model = create_model(num_classes=NUM_CLASSES)
    model = model.to(DEVICE)
    # get the model parameters
    params = [p for p in model.parameters() if p.requires_grad]
    # define the optimizer
    optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
    # initialize the Averager class
    train_loss_hist = Averager()
    val_loss_hist = Averager()
    train_itr = 1
    val_itr = 1
    # train and validation loss lists to store loss values of all...
    # ... iterations till ena and plot graphs for all iterations
    train_loss_list = []
    val_loss_list = []
    # name to save the trained model with
    MODEL_NAME = 'model'
    # whether to show transformed images from data loader or not
    # if VISUALIZE_TRANSFORMED_IMAGES:
    #     from custom_utils import show_tranformed_image
    #     show_tranformed_image(train_loader)
    # initialize SaveBestModel class
    save_best_model = SaveBestModel()
    # start the training epochs
    for epoch in range(NUM_EPOCHS):
        print(f"\nEPOCH {epoch+1} of {NUM_EPOCHS}")
        # reset the training and validation loss histories for the current epoch
        train_loss_hist.reset()
        val_loss_hist.reset()
        # start timer and carry out training and validation
        start = time.time()
        train_loss = train(train_loader, model)
        val_loss = validate(valid_loader, model)
        print(f"Epoch #{epoch+1} train loss: {train_loss_hist.value:.3f}")   
        print(f"Epoch #{epoch+1} validation loss: {val_loss_hist.value:.3f}")   
        end = time.time()
        print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch}")
        # save the best model till now if we have the least loss in the...
        # ... current epoch
        save_best_model(
            val_loss_hist.value, epoch, model, optimizer
        )
        # save the current epoch model
        save_model(OUT_DIR, epoch, model, optimizer)
        # save loss plot
        save_loss_plot(OUT_DIR, train_loss, val_loss)
        
        # sleep for 5 seconds after each epoch
        time.sleep(5)

thecho7 · February 9, 2023, 7:43am

I guess images has a wrong dimension to transform.
Could you print out its dimension?

bugramurat · February 9, 2023, 7:49am

I printed input image dimension. Output is:
torch.Size([416, 416, 3])

thecho7 · February 9, 2023, 8:00am

The dimension of an image should be (3, H, W).
Try images = images.permute(2, 0, 1)

bugramurat · February 9, 2023, 8:05am

I think its solved but i have a new error now. Thanks for solution.

Error output:

Traceback (most recent call last):
  File "c:\Users\lemon\Desktop\ap_py_2\train.py", line 135, in <module>
    train_loss = train(train_loader, model)
  File "c:\Users\lemon\Desktop\ap_py_2\train.py", line 41, in train
    loss_dict = model(images, targets)
  File "C:\Users\lemon\miniconda3\envs\cnn-env-03\lib\site-packages\torch\nn\modules\module.py", line 1131, in _call_impl
    return forward_call(*input, **kwargs)
  File "C:\Users\lemon\miniconda3\envs\cnn-env-03\lib\site-packages\torchvision\models\detection\generalized_rcnn.py", line 106, in forward
    features = self.backbone(images[0].tensors)
TypeError: 'ImageList' object is not subscriptable

bugramurat · February 9, 2023, 8:15am

I just changed all images[0] to images and its solved. Its training now.