CNN Video Classification RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 2, 5, 5], but got 5-dimensional input of size [32, 10, 2, 100, 100] instead

I work on a CNN model which classify videos. I extract frames from videos and put all frames in a variable as input.
Frame number which is extracted from all videos - 10
Batch size - 32
Frame size - 100x100

My model is as shown below:

class Net(nn.Module):
def init(self, img_x=100, img_y=100, fc_hidden1=1024, fc_hidden2=768, drop_p=0.5, output=8):
super().init()

    self.img_x = img_x
    self.img_y = img_y
    self.output = output

    # CNN architechtures
    self.ch1, self.ch2, self.ch3, self.ch4 = 32, 64, 128, 256
    self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)  # 2d kernal size
    self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)  # 2d strides
    self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)  # 2d padding

    # conv2D output shapes
    self.conv1_outshape = self.conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1,
                                             self.s1)  # Conv1 output shape
    self.conv2_outshape = self.conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
    self.conv3_outshape = self.conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
    self.conv4_outshape = self.conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)

    # fully connected layer hidden nodes
    self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
    self.drop_p = drop_p

    self.conv1 = nn.Sequential(
        nn.Conv2d(in_channels=2, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
        nn.BatchNorm2d(self.ch1, momentum=0.5),
        nn.ReLU(inplace=True),
        #nn.MaxPool2d(kernel_size= 2),
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2,
                  padding=self.pd2),
        nn.BatchNorm2d(self.ch2, momentum=0.5),
        nn.ReLU(inplace=True),
        #nn.MaxPool2d(kernel_size=2),
    )

    self.conv3 = nn.Sequential(
        nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3,
                  padding=self.pd3),
        nn.BatchNorm2d(self.ch3, momentum=0.5),
        nn.ReLU(inplace=True),
        #nn.MaxPool2d(kernel_size=2),
    )

    self.conv4 = nn.Sequential(
        nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4,
                  padding=self.pd4),
        nn.BatchNorm2d(self.ch4, momentum=0.5),
        nn.ReLU(inplace=True),
        #nn.MaxPool2d(kernel_size=2),
    )

    self.drop = nn.Dropout2d(self.drop_p)
    self.pool = nn.MaxPool2d(kernel_size = 3)
    self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1],
                         self.fc_hidden1)  # fully connected layer, output k classes
    self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
    self.fc3 = nn.Linear(self.fc_hidden2, self.output)  

def forward(self, x):
  # CNNs
  
  x = self.conv1(x)
  x = self.conv2(x)
  x = self.conv3(x)
  x = self.conv4(x)
  x = x.view(x.size(0), -1)  # flatten the output of conv

  # FC layers
  x = F.dropout(x, p=self.drop_p, training=self.training)
  x = F.relu(self.fc1(x))
  # x = F.dropout(x, p=self.drop_p, training=self.training)
  x = F.relu(self.fc2(x))
  x = self.fc3(x)
  

    return x

def conv2D_output_size(self, img_size, padding, kernel_size, stride):
    # compute output shape of conv2D
    outshape = (np.floor((img_size[0] + 2 * padding[0] - (kernel_size[0] - 1) - 1) / stride[0] + 1).astype(int),
                np.floor((img_size[1] + 2 * padding[1] - (kernel_size[1] - 1) - 1) / stride[1] + 1).astype(int))
    return outshape

I got the error below:

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 2, 5, 5], but got 5-dimensional input of size [32, 10, 2, 100, 100] instead

Could you help me to solve this issue? Thanks in advance

The error is raised, because nn.Conv2d expect a 4D input in the shape [batch_size, channels, height, width], while you are apparently providing a 5D input as [batch_size, nb_frames, channels, height, width].

You could use nn.Conv3d for these shapes and maybe treat the nb_frames dimension as the depth?
Alternatively you could also pass each frame one by one to your current model, if that fits your use case.

Thanks for your answer.