Hello everyone,
I’m working on a project for my school in the field of computer vision, this topic is very new to me and I have no one to help me (university particularity). I encounter the specific error “RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [2, 12]”.
I have tried using ChatGPT to help me but I keep getting the same things over and over again, but I’m still struggling to understand why my model got input of size [2, 12].
I am using the SSD (Single Shot Detector) model with a resnet18 backbone. Also,I’m using a specific dataset with space images of different objects like spacecraft or debris. The goal is to train a model to be able to detect these different objects with a good accuracy on the images (11 classes).
There is not so much information about the ssd model I think, and my case is a little bit particular from what I have seen on internet. For now I need to run my model locally on my pc with a data sample (1100 images, 100 of each class), and then i will run and train my model on a cloud platform from my university.
Main code :
# Main code
import torch
from utilsSample import SPARKDataset # Assuming SPARKDataset is defined in utils.py
from utilsSample import PyTorchSparkDataset # Assuming SPARKDataset is defined in utils.py
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.models.detection import ssd
from torchvision.models.mobilenet import mobilenet_v2
from torchvision.models.detection.anchor_utils import AnchorGenerator
from torchvision.models import resnet18
import matplotlib.pyplot as plt
from random import randint
# Assuming you have your dataset defined in SPARKDataset class in utils.py
# # Define transforms (adjust these based on your requirements)
# transform = transforms.Compose([
# transforms.Resize((300, 300)), # Resizing
# transforms.ToTensor()
# ])
# Path to your dataset directory
data_root = 'C:\\Users\\alibe\\Documents\\UPSSITECH3\\ERASMUS cours\\Computer Vision and Image Analysis\\stream-1\\data samples\\'
# Define the class map, you can use the same as previously defined
class_map = {
'proba_2': 0, 'cheops': 1, 'debris': 2, 'double_star': 3, 'earth_observation_sat_1': 4, 'lisa_pathfinder': 5,
'proba_3_csc': 6, 'proba_3_ocs': 7, 'smart_1': 8, 'soho': 9, 'xmm_newton': 10
}
# Dataset initialization
visualize_train_dataset = SPARKDataset(class_map, root_dir=data_root, split='train', transform=None)
visualize_val_dataset = SPARKDataset(class_map, root_dir=data_root, split='val', transform=None)
train_dataset = PyTorchSparkDataset(class_map, root_dir=data_root, split='train', transform=None)
val_dataset = PyTorchSparkDataset(class_map, root_dir=data_root, split='val', transform=None)
# Define the model
backbone = resnet18(pretrained=True)
num_classes = 12 # Number of classes in your dataset
num_channels = 3 #Number of channels for one image in the dataset
# Assuming fc layer is the fully connected layer in your model
backbone.fc = torch.nn.Linear(backbone.fc.in_features, num_classes)
# SSD needs an anchor generator - define it
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),))
# Size of the image used as input (1024x1024 in your case)
image_size = (300, 300)
# Create the SSD model with the specified components
model = ssd.SSD(backbone, anchor_generator, image_size, num_classes)
#######
# Define the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
print(type(train_dataset))
# Define the custom collate function
# def custom_collate(batch):
# images, targets = zip(*batch)
# images = torch.stack(images)
# return images, targets
# # Define the data loaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)
# Define the data loaders with custom collate function
# train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=custom_collate)
# val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=custom_collate)
print(type(train_loader))
print(train_loader)
# setting device to model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
torch.cuda.empty_cache()
# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
model.train()
for batch_idx, (images, targets) in enumerate(train_loader):
optimizer.zero_grad()
# No need to stack, as images is already a batched tensor
# images = torch.stack(images)
# Convert images to the appropriate format
# images = images.to(device)
print(targets["labels"])
print(batch_idx)
print(images)
print(targets)
print("images (input) variable type : ", type(images))
print("targets (input) variable type : ", type(targets))
print(len(images))
print(images.size)
print(images.shape)
print("")
print("")
# Debug print
# print("Mean of Image:", torch.mean(images))
# print("Std of Image:", torch.std(images))
print("")
print(type(images))
# print("Input Size:", images[0].size())
# print("Images Shape:", images[0].shape)
#print(model)
print("")
print('TARGETS PART')
print(type(targets))
print(targets)
print("")
print(targets['boxes'])
print(targets['boxes'][0])
print("")
print(targets['labels'])
print("")
for key, value in targets.items():
print(value[0])
targets_list = []
for i in range(len(targets["labels"])):
boxes = targets["boxes"][i].float() # Remove the singleton dimension
# boxes = targets["boxes"][i].squeeze(0).float() # Remove the singleton dimension
# labels = targets["labels"][i].squeeze(0) # Remove the singleton dimension
labels = targets["labels"][i].int() # Remove the singleton dimension
# Construct a dictionary with the required structure
targets_dict = {"boxes": boxes, "labels": labels}
# Append the dictionary to the list
targets_list.append(targets_dict)
# print('TARGETS PART 2')
# print(targets)
# print("")
# print(targets["boxes"])
# print(targets["boxes"].shape)
print(type(model))
print(model.transform)
#print(model)
images = (images.float() / 255.0).to(device)
images = images.permute(0, 3, 1, 2).float() / 255.0 # Assuming the last dimension is channels
images = images.to(device)
print("Images SHAPE : ", images.shape)
print("Targets_list SHAPE : ", targets_dict["boxes"].shape)
print("Targets_list SHAPE : ", targets_dict["labels"].shape)
print("Targets_list TYPE : ", type(targets_dict["boxes"]))
print("Targets_list TYPE : ", type(targets_dict["labels"]))
print(targets_list)
#print(model)
print(images.shape)
print(images)
loss_dict = model(images, targets_list)
total_loss = sum(loss for loss in loss_dict.values())
total_loss.backward()
optimizer.step()
# # Print or log the loss if needed
# if batch_idx % log_interval == 0:
# print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {total_loss.item()}')
utilsSample.py
from ast import literal_eval
import os
import matplotlib.pyplot as plt
from skimage import io , img_as_uint
import pandas as pd
import matplotlib.patches as mpatches
from PIL import Image
import torch
import torchvision
from torchvision import transforms
class YourCustomTransformation(transforms.Compose):
def __call__(self, img):
for t in self.transforms:
img = t(img)
return img
class ResizeWithBbox(transforms.Resize):
def __init__(self, size, apply_to_bbox=True):
super(ResizeWithBbox, self).__init__(size)
self.apply_to_bbox = apply_to_bbox
def __call__(self, img):
img = super(ResizeWithBbox, self).__call__(img)
if self.apply_to_bbox:
# Bounding box adjustments can be done in SPARKDataset.__getitem__
return img
return img
def process_labels(labels_dir, split):
#path = os.path.join(labels_dir, file_name)
labels_filename = os.path.join(labels_dir, split + '.csv') # Construct the file path correctly
labels = pd.read_csv(labels_filename, sep=';')
return labels
class SPARKDataset:
""" Class for dataset inspection: easily accessing single images, and corresponding ground truth pose data. """
def __init__(self, class_map, root_dir ,split, transform=None, detection=True):
# self.root_dir = os.path.join(data_dir, split)
self.root_dir = root_dir
self.labels = process_labels(root_dir, split)
self.class_map = class_map
self.transform = transform #Added the transform attribute
self.detection = detection #Add the detection attribute
def __getitem__(self, idx):
sat_name = self.labels.iloc[idx]['class']
img_name = self.labels.iloc[idx]['filename']
img_name_ok = 'train\\' + img_name
image_name = os.path.join(self.root_dir, img_name_ok)
# Load image as PIL image
image = Image.open(image_name)
# Convert PIL image to tensor
image = transforms.ToTensor()(image)
# Initialize bbox
bbox = None
# Apply transformation if specified
if self.transform is not None:
image = self.transform(image)
if self.detection:
# Adjust bounding box accordingly
bbox = self.labels.iloc[idx]['bbox']
bbox = literal_eval(bbox)
bbox = [bbox[1], bbox[0], bbox[3], bbox[2]] # Convert to [x_min, y_min, x_max, y_max]
targets = {
'boxes': torch.tensor([bbox], dtype=torch.float32),
'labels': torch.tensor([self.class_map[sat_name]], dtype=torch.int64),
#'image_id': img_name, # Assuming 'filename' uniquely identifies each image
}
else:
# Include labeling information for non-detection case
targets = {
'labels': torch.tensor([self.class_map[sat_name]], dtype=torch.int64),
'image_id': img_name, # Assuming 'filename' uniquely identifies each image
}
return image, targets
def __len__(self):
length = 1101
#return 66000 # Replace 66000 with the actual length of your dataset
return length # Replace "length" with the dataset list, array, or the length of your dataset
def get_image(self, i=0):
""" Loading image as PIL image. """
sat_name = self.labels.iloc[i]['class']
img_name = self.labels.iloc[i]['filename']
img_name_ok = 'train\\' + img_name ###### To modify with the group of images choosen (train, val, test)
image_name = os.path.join(self.root_dir, img_name_ok) #Ensure image path is correctly created
print('img_name_ok is : ' + img_name_ok)
print("Image Path:", image_name) #Add this line for debugging
image = io.imread(image_name)
return image , self.class_map[sat_name]
def get_bbox(self, i=0):
""" Getting bounding box for image. """
bbox = self.labels.iloc[i]['bbox']
bbox = literal_eval(bbox)
min_x, min_y, max_x, max_y = bbox
return min_x, min_y, max_x, max_y
def visualize(self,i, size=(15,15), ax=None):
""" Visualizing image, with ground truth pose with axes projected to training image. """
if ax is None:
ax = plt.gca()
image, img_class = self.get_image(i)
min_x, min_y, max_x, max_y = self.get_bbox(i)
ax.imshow(image,vmin=0, vmax=255)
rect = mpatches.Rectangle((min_y, min_x), max_y - min_y, max_x - min_x,
fill=False, edgecolor='red', linewidth=2)
ax.add_patch(rect)
label = f"{list(self.class_map.keys())[list(self.class_map.values()).index(img_class)]}"
ax.text(min_y, min_x-20, label,color='white',fontsize=15)
ax.set_axis_off()
return
try:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
has_pytorch = True
print('Found Pytorch')
except ImportError:
has_pytorch = False
if has_pytorch:
class PyTorchSparkDataset(Dataset):
""" SPARK dataset that can be used with DataLoader for PyTorch training. """
def __init__(self, class_map, split, root_dir, transform=None,detection = True):
if not has_pytorch:
raise ImportError('Pytorch was not imported successfully!')
if split not in {'train', 'val', 'test'}:
raise ValueError('Invalid split, has to be either \'train\', \'val\' or \'test\'')
self.class_map = class_map
self.detection = detection
self.split = split
self.root_dir = os.path.join(root_dir, self.split)
self.labels = process_labels(root_dir,split)
self.transform = transform
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
sat_name = self.labels.iloc[idx]['class']
img_name = self.labels.iloc[idx]['filename']
image_name = f'{self.root_dir}/{img_name}'
image = io.imread(image_name)
if self.transform is not None:
torch_image = self.transform(image)
else:
torch_image = torch.from_numpy(image).permute(2,1,0)
if self.detection:
bbox = self.labels.iloc[idx]['bbox']
bbox = literal_eval(bbox)
bbox = [bbox[1], bbox[0], bbox[3], bbox[2]] # Convert to [x_min, y_min, x_max, y_max]
targets = {
'boxes': torch.tensor([bbox], dtype=torch.float32),
'labels': torch.tensor([self.class_map[sat_name]], dtype=torch.int64), # Use class_map here
#'image_id': idx, # Change to a single integer
}
return image, targets
return image, targets
else:
class PyTorchSparkDataset:
def __init__(self, *args, **kwargs):
raise ImportError('Pytorch is not available!')
My code is a bit messy with a lot of prints because I tried several things to get the right input format for the model.
After running of the training loop, i have this output and this error :
Found Pytorch
C:\Users\alibe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
C:\Users\alibe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
<class 'utilsSample.PyTorchSparkDataset'>
<class 'torch.utils.data.dataloader.DataLoader'>
<torch.utils.data.dataloader.DataLoader object at 0x0000028DDDD64E50>
img_name_ok is : train\img084467.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img084467.jpg
img_name_ok is : train\img068659.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img068659.jpg
img_name_ok is : train\img087785.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img087785.jpg
img_name_ok is : train\img040469.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img040469.jpg
img_name_ok is : train\img043975.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img043975.jpg
img_name_ok is : train\img020490.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img020490.jpg
img_name_ok is : train\img080816.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img080816.jpg
img_name_ok is : train\img073756.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img073756.jpg
img_name_ok is : train\img025365.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img025365.jpg
img_name_ok is : train\img031010.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img031010.jpg
img_name_ok is : train\img061777.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img061777.jpg
img_name_ok is : train\img068321.jpg
Image Path: C:\Users\alibe\Documents\UPSSITECH3\ERASMUS cours\Computer Vision and Image Analysis\stream-1\data samples\train\img068321.jpg
tensor([[4],
[9]])
0
tensor([[[[ 32, 32, 32],
[ 57, 57, 57],
[ 44, 44, 44],
...,
[ 90, 89, 87],
[ 80, 79, 77],
[ 70, 69, 67]],
[[ 46, 46, 46],
[ 52, 52, 52],
[ 48, 48, 48],
...,
[ 77, 76, 74],
[ 88, 87, 85],
[116, 115, 113]],
[[ 47, 47, 47],
[ 42, 42, 42],
[ 55, 55, 55],
...,
[ 59, 58, 56],
[142, 141, 139],
[135, 134, 132]],
...,
[[ 48, 48, 48],
[ 48, 48, 48],
[ 39, 39, 39],
...,
[ 54, 54, 54],
[ 36, 36, 36],
[ 52, 52, 52]],
[[ 53, 53, 53],
[ 51, 51, 51],
[ 35, 35, 35],
...,
[ 47, 47, 47],
[ 40, 40, 40],
[ 46, 46, 46]],
[[ 40, 40, 40],
[ 53, 53, 53],
[ 35, 35, 35],
...,
[ 47, 47, 47],
[ 53, 53, 53],
[ 48, 48, 48]]],
[[[ 42, 42, 42],
[ 31, 31, 31],
[ 24, 24, 24],
...,
[ 31, 31, 31],
[ 48, 48, 48],
[ 42, 42, 42]],
[[ 30, 30, 30],
[ 25, 25, 25],
[ 44, 44, 44],
...,
[ 49, 49, 49],
[ 34, 34, 34],
[ 40, 40, 40]],
[[ 10, 10, 10],
[ 23, 23, 23],
[ 19, 19, 19],
...,
[ 33, 33, 33],
[ 27, 27, 27],
[ 29, 29, 29]],
...,
[[ 30, 30, 30],
[ 26, 26, 26],
[ 34, 34, 34],
...,
[ 31, 31, 31],
[ 34, 34, 34],
[ 31, 31, 31]],
[[ 43, 43, 43],
[ 33, 33, 33],
[ 29, 29, 29],
...,
[ 32, 32, 32],
[ 24, 24, 24],
[ 18, 18, 18]],
[[ 25, 25, 25],
[ 17, 17, 17],
[ 43, 43, 43],
...,
[ 40, 40, 40],
[ 29, 29, 29],
[ 23, 23, 23]]]], dtype=torch.uint8)
{'boxes': tensor([[[ 799., 0., 1024., 75.]],
[[ 41., 578., 259., 920.]]]), 'labels': tensor([[4],
[9]])}
images (input) variable type : <class 'torch.Tensor'>
targets (input) variable type : <class 'dict'>
2
<built-in method size of Tensor object at 0x0000028DE60FDF90>
torch.Size([2, 1024, 1024, 3])
<class 'torch.Tensor'>
TARGETS PART
<class 'dict'>
{'boxes': tensor([[[ 799., 0., 1024., 75.]],
[[ 41., 578., 259., 920.]]]), 'labels': tensor([[4],
[9]])}
tensor([[[ 799., 0., 1024., 75.]],
[[ 41., 578., 259., 920.]]])
tensor([[ 799., 0., 1024., 75.]])
tensor([[4],
[9]])
tensor([[ 799., 0., 1024., 75.]])
tensor([4])
<class 'torchvision.models.detection.ssd.SSD'>
GeneralizedRCNNTransform(
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
Resize(min_size=(300,), max_size=300, mode='bilinear')
)
Images SHAPE : torch.Size([2, 3, 1024, 1024])
Targets_list SHAPE : torch.Size([1, 4])
Targets_list SHAPE : torch.Size([1])
Targets_list TYPE : <class 'torch.Tensor'>
Targets_list TYPE : <class 'torch.Tensor'>
[{'boxes': tensor([[ 799., 0., 1024., 75.]]), 'labels': tensor([4], dtype=torch.int32)}, {'boxes': tensor([[ 41., 578., 259., 920.]]), 'labels': tensor([9], dtype=torch.int32)}]
torch.Size([2, 3, 1024, 1024])
tensor([[[[0.0005, 0.0009, 0.0007, ..., 0.0014, 0.0012, 0.0011],
[0.0007, 0.0008, 0.0007, ..., 0.0012, 0.0014, 0.0018],
[0.0007, 0.0006, 0.0008, ..., 0.0009, 0.0022, 0.0021],
...,
[0.0007, 0.0007, 0.0006, ..., 0.0008, 0.0006, 0.0008],
[0.0008, 0.0008, 0.0005, ..., 0.0007, 0.0006, 0.0007],
[0.0006, 0.0008, 0.0005, ..., 0.0007, 0.0008, 0.0007]],
[[0.0005, 0.0009, 0.0007, ..., 0.0014, 0.0012, 0.0011],
[0.0007, 0.0008, 0.0007, ..., 0.0012, 0.0013, 0.0018],
[0.0007, 0.0006, 0.0008, ..., 0.0009, 0.0022, 0.0021],
...,
[0.0007, 0.0007, 0.0006, ..., 0.0008, 0.0006, 0.0008],
[0.0008, 0.0008, 0.0005, ..., 0.0007, 0.0006, 0.0007],
[0.0006, 0.0008, 0.0005, ..., 0.0007, 0.0008, 0.0007]],
[[0.0005, 0.0009, 0.0007, ..., 0.0013, 0.0012, 0.0010],
[0.0007, 0.0008, 0.0007, ..., 0.0011, 0.0013, 0.0017],
[0.0007, 0.0006, 0.0008, ..., 0.0009, 0.0021, 0.0020],
...,
[0.0007, 0.0007, 0.0006, ..., 0.0008, 0.0006, 0.0008],
[0.0008, 0.0008, 0.0005, ..., 0.0007, 0.0006, 0.0007],
[0.0006, 0.0008, 0.0005, ..., 0.0007, 0.0008, 0.0007]]],
[[[0.0006, 0.0005, 0.0004, ..., 0.0005, 0.0007, 0.0006],
[0.0005, 0.0004, 0.0007, ..., 0.0008, 0.0005, 0.0006],
[0.0002, 0.0004, 0.0003, ..., 0.0005, 0.0004, 0.0004],
...,
[0.0005, 0.0004, 0.0005, ..., 0.0005, 0.0005, 0.0005],
[0.0007, 0.0005, 0.0004, ..., 0.0005, 0.0004, 0.0003],
[0.0004, 0.0003, 0.0007, ..., 0.0006, 0.0004, 0.0004]],
[[0.0006, 0.0005, 0.0004, ..., 0.0005, 0.0007, 0.0006],
[0.0005, 0.0004, 0.0007, ..., 0.0008, 0.0005, 0.0006],
[0.0002, 0.0004, 0.0003, ..., 0.0005, 0.0004, 0.0004],
...,
[0.0005, 0.0004, 0.0005, ..., 0.0005, 0.0005, 0.0005],
[0.0007, 0.0005, 0.0004, ..., 0.0005, 0.0004, 0.0003],
[0.0004, 0.0003, 0.0007, ..., 0.0006, 0.0004, 0.0004]],
[[0.0006, 0.0005, 0.0004, ..., 0.0005, 0.0007, 0.0006],
[0.0005, 0.0004, 0.0007, ..., 0.0008, 0.0005, 0.0006],
[0.0002, 0.0004, 0.0003, ..., 0.0005, 0.0004, 0.0004],
...,
[0.0005, 0.0004, 0.0005, ..., 0.0005, 0.0005, 0.0005],
[0.0007, 0.0005, 0.0004, ..., 0.0005, 0.0004, 0.0003],
[0.0004, 0.0003, 0.0007, ..., 0.0006, 0.0004, 0.0004]]]])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[7], line 96
93 print(images.shape)
94 print(images)
---> 96 loss_dict = model(images, targets_list)
99 total_loss = sum(loss for loss in loss_dict.values())
100 total_loss.backward()
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torchvision\models\detection\ssd.py:378, in SSD.forward(self, images, targets)
375 features = list(features.values())
377 # compute the ssd heads outputs using the features
--> 378 head_outputs = self.head(features)
380 # create the set of anchors
381 anchors = self.anchor_generator(images, features)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torchvision\models\detection\ssd.py:66, in SSDHead.forward(self, x)
64 def forward(self, x: List[Tensor]) -> Dict[str, Tensor]:
65 return {
---> 66 "bbox_regression": self.regression_head(x),
67 "cls_logits": self.classification_head(x),
68 }
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torchvision\models\detection\ssd.py:95, in SSDScoringHead.forward(self, x)
92 all_results = []
94 for i, features in enumerate(x):
---> 95 results = self._get_result_from_module_list(features, i)
97 # Permute output from (N, A * K, H, W) to (N, HWA, K).
98 N, _, H, W = results.shape
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torchvision\models\detection\ssd.py:88, in SSDScoringHead._get_result_from_module_list(self, x, idx)
86 for i, module in enumerate(self.module_list):
87 if i == idx:
---> 88 out = module(x)
89 return out
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\conv.py:460, in Conv2d.forward(self, input)
459 def forward(self, input: Tensor) -> Tensor:
--> 460 return self._conv_forward(input, self.weight, self.bias)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\conv.py:456, in Conv2d._conv_forward(self, input, weight, bias)
452 if self.padding_mode != 'zeros':
453 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
454 weight, bias, self.stride,
455 _pair(0), self.dilation, self.groups)
--> 456 return F.conv2d(input, weight, bias, self.stride,
457 self.padding, self.dilation, self.groups)
RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [2, 12]
I know that there are a lot of things and that it does not look good, but i’m a very beginner and I want to learn, so thank you very much for your help on this.