I’m trying to fine-tune a BERT model with a CNN layer on top, but get the error:
conv2d() received an invalid combination of arguments - got (Tensor, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
- (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups) didn’t match because some of the arguments have invalid types: (Tensor, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
from transformers import BertModel, BertTokenizer, AdamW
import torch.nn as nn
import pytorch_lightning as pl
class CommentModule(pl.LightningModule):
def __init__(self, n_classes=13):
super().__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.conv = nn.Conv2d(in_channels=13, out_channels=13, kernel_size=(3, 768), padding=True)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=3, stride=1)
self.dropout = nn.Dropout(0.1)
self.fc = nn.Linear(442, 3)
self.flat = nn.Flatten()
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, input_ids, attention_mask, labels=None):
# outputs[2] = [13, 32, 128, 768]
outputs = self.bert(input_ids, attention_mask, output_hidden_states=True)
# x now of shape = [32, 13, 128, 768]
# ([13 layers?, 32=batch size, 128=max_seq_length, 729=BERT hidden size)
x = torch.transpose(torch.cat(tuple([t.unsqueeze(0) for t in outputs[2]]), 0), 0, 1)
x = self.dropout(x)
########### ERROR OCCURS AT self.conv(x)
x = self.conv(x)
######################
x = self.relu(x)
x = self.dropout(x)
x = self.pool(x)
x = self.fc(self.dropout(self.flat(self.dropout(x))))
return self.softmax(x)
def training_step(self, batch, batch_idx):
input_ids = batch['input_ids']
attention_mask = batch['attention_mask']
labels = batch['labels']
loss, outputs = self.forward(input_ids, attention_mask, labels)
return {'loss': loss, 'predictions': outputs, 'labels': labels}
Notice that my received arguments are “(Tensor, Parameter, Parameter, tuple, tuple, tuple, int)”, while the expected arguments are “(Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)”. These almost match?
Conv1d() accepts 3 arguments (Conv2d accepts 4): If I set output_hidden_states=False and pass in the 3 resulting arguments to Conv1d, I get the exact same error. I’m really not sure what the problem is. outputs[0] and outputs[1] are tensors, output[2] is a tuple with shape (32, 13, 128, 768) (batch_size, input_layers, sequence_length, BERT hidden size) if that helps.