RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 768]

class SentimentClassifier(nn.Module):
    def __init__(self, n_classes):
        super(SentimentClassifier, self).__init__()
        self.bert = AutoModel.from_pretrained("vinai/phobert-base")
        self.drop = nn.Dropout(p=0.3)
        # self.fc = nn.Linear(self.bert.config.hidden_size, n_classes)
        # nn.init.normal_(self.fc.weight, std=0.02)
        # nn.init.normal_(self.fc.bias, 0)
        self.fc = nn.Sequential(#nn.Linear(self.bert.config.hidden_size, n_classes),
                                nn.Conv2d(in_channels=128, out_channels=64, kernel_size = (5,5), padding='same', bias=True),
                                nn.MaxPool2d(kernel_size=(2,2)),
                                nn.Conv2d(in_channels=64, out_channels=32, kernel_size = (5,5), padding='same', bias=True),
                                nn.ReLU(),
                                nn.MaxPool2d(kernel_size=(2,2)),
                                nn.Conv2d(in_channels=32, out_channels=16, kernel_size=(5,5), padding='same', bias=True),
                                nn.ReLU(),
                                nn.MaxPool2d(kernel_size=(2,2)),
                                nn.Conv2d(in_channels=16, out_channels=8, kernel_size=(5,5), padding='same', bias=True),
                                nn.ReLU(),
                                nn.MaxPool2d(kernel_size=(2,2)),
                                nn.BatchNorm2d(num_features=8, eps=0.001, momentum=0.99),
                                nn.Flatten(),
                                nn.Dropout(p=0.4),
                                nn.Linear(in_features=8, out_features=128, bias=True),
                                nn.ReLU(),
                                nn.Dropout(p=0.5),
                                nn.Linear(in_features=128, out_features=class_number, bias=True),
                                nn.Softmax()
                                )
        
        ''' Note: Build Keras model as following
        '''
        # model = Sequential()
        # model.add(Conv2D(128, kernel_size=(5, 5), input_shape=(ip_shape), padding='same'))
        # model.add(MaxPooling2D(pool_size=(2, 2)))
        # # model.add(Conv2D(64, kernel_size=(5, 5), activation='relu', padding='same'))
        # # model.add(MaxPooling2D(pool_size=(2, 2)))
        # model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', padding='same'))
        # # model.add(MaxPooling2D(pool_size=(2, 2)))
        # # model.add(Conv2D(16, kernel_size=(5, 5), activation='relu', padding='same'))
        # # model.add(MaxPooling2D(pool_size=(2, 2)))
        # model.add(BatchNormalization())
        # model.add(Flatten())
        # model.add(Dropout(0.4))
        # model.add(Dense(128, activation='relu'))
        # model.add(Dropout(0.5))
        # model.add(Dense(class_number, activation='softmax'))
        # self.fc = model
        
    def forward(self, input_ids, attention_mask):
        last_hidden_state, output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=False # Dropout will errors if without this
        )

        x = self.drop(output)
        x = self.fc(x)
        return x

Hi all,
I read two topics about this issue, but still misunderstanding how to to fix this problem. Issue happen when I add a Conv2d in Sequential model, this output is a 2-dimension but input is expected 3 or 4-dimension. So, please help to figure out how can I develop this module, and please help to explain the background about this.

I also share source code file, this is open source code on GitHub but I’d like to add infer module for testing the real result.

The reason why I could not keep Keras model because I met the issue as below, I still not find out where I am wrong although almost all code run smooth.

nn.Conv2d expects a 4-dimensional input as [batch_size, channels, height, width] and will return a 4-dimensional output [batch_size, out_channels, height width]. In newer versions it can accept a 3-dimensional input if you are using a single sample and don’t want to add the batch dimension to the tensor.