Hi all,
I am trying to develop CNN-LSTM model for text classification. Here are the __init__
function and forward
function of my code:
def __init__(self, bert_config, device, dropout_rate, n_class, out_channel=16, lstm_hidden_size=None):
super(CustomBertCNNLSTMModel, self).__init__()
self.bert_config = bert_config
self.dropout_rate = dropout_rate
self.n_class = n_class
self.out_channel = out_channel
model_config = AutoConfig.from_pretrained(self.bert_config, output_hidden_states=True)
self.bert = AutoModel.from_pretrained(self.bert_config, config=model_config)
self.out_channels = self.bert.config.num_hidden_layers*self.out_channel
self.tokenizer = AutoTokenizer.from_pretrained(self.bert_config, model_max_length=512)
if not lstm_hidden_size:
self.lstm_hidden_size = self.bert.config.hidden_size
else:
self.lstm_hidden_size = lstm_hidden_size
self.conv = nn.Conv2d(in_channels=self.bert.config.num_hidden_layers,
out_channels=self.out_channels,
kernel_size=(3, self.bert.config.hidden_size),
groups=self.bert.config.num_hidden_layers)
self.lstm = nn.LSTM(self.bert.config.hidden_size, self.lstm_hidden_size, bidirectional=False)
self.hidden_to_softmax1 = nn.Linear(self.out_channels, self.n_class, bias=True)
self.hidden_to_softmax2 = nn.Linear(self.lstm_hidden_size * 2, n_class, bias=True)
self.dropout = nn.Dropout(p=self.dropout_rate)
self.device = device
def forward(self, sents):
sents_tensor, masks_tensor, sents_lengths = sents_to_tensor(self.tokenizer, sents, self.device)
encoded_layers = self.bert(input_ids=sents_tensor, attention_mask=masks_tensor, return_dict=True)
encoded_stack_layer = torch.stack(encoded_layers, 1)
conv_out = self.conv(encoded_stack_layer)
conv_out = torch.squeeze(conv_out, dim=3) some_length)
conv_out, _ = torch.max(conv_out, dim=2)
enc_hiddens, (last_hidden, last_cell) = self.lstm(pack_padded_sequence(conv_out, sents_lengths,enforce_sorted=False))
output_hidden = torch.cat((last_hidden[0], last_hidden[1]), dim=1)
output_hidden = self.dropout(output_hidden)
pre_softmax = self.hidden_to_softmax2(output_hidden)
return pre_softmax
I get following error:
expected_input_dim, input.dim()))
RuntimeError: input must have 2 dimensions, got 1
In
enc_hiddens, (last_hidden, last_cell) = self.lstm(pack_padded_sequence(conv_out, sents_lengths,enforce_sorted=False))
I really am confused about feeding CNN output to LSTM and developing an hybrid model. Can someone kindly point out me the right direction?