I am seeing a shape error when try to detect objects in video?

here is the code of model:- class tinyvgg12(nn.Module):
def init(self,input,hidden,output):
super().init()
self.Conv_block1 = nn.Sequential(
nn.Conv2d(
in_channels = input,
out_channels = hidden,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.Conv2d(
in_channels = hidden,
out_channels = hidden,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2)
)

self.Conv_block2 = nn.Sequential(
    nn.Conv2d(
        in_channels =hidden,
        out_channels = hidden,
        kernel_size=3,
        stride=1,
        padding=1
    ),
    nn.ReLU(),
    nn.Conv2d(
        in_channels=hidden,
        out_channels=hidden,
        kernel_size=3,
        stride=1,
        padding=1
    ),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2,stride=2)
)

self.classifier = nn.Sequential(
    nn.Flatten(),
    nn.Linear(
        in_features=hidden*16*16,
        out_features=output
    )
)

def forward(self,x):
x = self.Conv_block1(x)
#print(x.shape)
x = self.Conv_block2(x)
#print(x.shape)
x = self.classifier(x)
#print(x.shape)
return x
return self.Conv_block2(self.Conv_block1(x))

here is the code for detecting objects:-
import cv2
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet50

model = model_x_5
device = “cuda” if torch.cuda.is_available() else “cpu”
model.eval()
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(size=(224,224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
video_path = “/content/data/Baby brushes his teeth.mp4”
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
ret,frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
input_tensor = transform(frame_rgb)
input_tensor = input_tensor.unsqueeze(0)
with torch.no_grad():
output = model(input_tensor.to(device))

cv2.imshow("object detection",frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
  break

cap.release()
cv2.destroyAllWindows()

error:-

RuntimeError Traceback (most recent call last)
in <cell line: 19>()
25 input_tensor = input_tensor.unsqueeze(0)
26 with torch.no_grad():
—> 27 output = model(input_tensor.to(device))
28
29 cv2.imshow(“object detection”,frame)

5 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) → str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x200704 and 16384x1)

Based on the error message the classifier is failing as the feature dimension does not match the features of the input activation. Set in_features to 200704 in the linear layer and it should work.

@ptrblck model_x_5 = tinyvgg12(input=3,
hidden=200704
output=len(class_list)) like this?

No, this won’t work since you are initializing the layer as:

nn.Linear(
    in_features=hidden*16*16,
    out_features=output
)

in which case hidden should be 200704 / 16 / 16 = 784.

@ptrblck
model_x_5 = tinyvgg12(input=3,
hidden=784
output=len(class_list))
nn.Linear(
in_features=hidden1616
out_features=output
)
like this ?

@ptrblck i given model_x_5 = tinyvgg12(input=3,
hidden=784
output=len(class_list))
nn.Linear(
in_features=hidden16 16
out_features=output
) like this but still seeing the same error:----------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
in <cell line: 19>()
25 input_tensor = input_tensor.unsqueeze(0)
26 with torch.no_grad():
—> 27 output = model(input_tensor.to(device))
28
29 cv2.imshow(“object detection”,frame)

5 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) → str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x2458624 and 200704x1)

Based on the error message the current input changed in shape which causes the same error with another shape.
Make sure your inputs have the same shape or add adaptive pooling layers if you are working with different input shapes.

how to add adaptive pooling layers & how do i check inputs have same shape.

You can check the shape of the inputs by simply printing it via print(input.shape) where input is the input tensor.

You can check e.g. the ResNet implementation to see how adaptive pooling layers are used.

class tinyvgg12(nn.Module):
def init(self,input,hidden,output):
super().init()
self.Conv_block1 = nn.Sequential(
nn.Conv2d(
in_channels = input,
out_channels = hidden,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.Conv2d(
in_channels = hidden,
out_channels = hidden,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2,stride=2)
)

self.Conv_block2 = nn.Sequential(
    nn.Conv2d(
        in_channels =hidden,
        out_channels = hidden,
        kernel_size=3,
        stride=1,
        padding=1
    ),
    nn.ReLU(),
    nn.Conv2d(
        in_channels=hidden,
        out_channels=hidden,
        kernel_size=3,
        stride=1,
        padding=1
    ),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2,stride=2)
)

self.classifier = nn.Sequential(
    nn.Flatten(),
    nn.Linear(
        in_features=hidden*16*16,
        out_features=output
    )
)
self.avgpool = nn.AdaptiveAvgPool2d((3,3))

def forward(self,x):
x = self.Conv_block1(x)
#print(x.shape)
x = self.Conv_block2(x)
#print(x.shape)
x = self.classifier(x)
#print(x.shape)
x = self.avgpool(x)
return x
return self.Conv_block2(self.Conv_block1(x)) i put adaptive pooling but i still seeing the error :----------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in <cell line: 9>()
8
9 for epoch in tqdm(range(epochs)):
—> 10 train_step(model=model_x_5.to(device),
11 data_loader=train_dataloader,
12 optimizer=optimizer,

6 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/utils.py in _list_with_default(out_size, defaults)
35 return out_size
36 if len(defaults) <= len(out_size):
—> 37 raise ValueError(
38 “Input dimension should be at least {}”.format(len(out_size) + 1)
39 )

ValueError: Input dimension should be at least 3