Is there any difference between these approaches or maybe there is no difference:
Approach 1 - I use pretrained BERT model inside my model, BERT training == False
class Model(nn.Module):
def __init__(self, input_size, hidden_size):
super(Model, self).__init__()
self.model = BertModel.from_pretrained('bert-base-uncased')
self.rnn = nn.LSTM(input_size, hidden_size,
batch_first=True, bidirectional=True)
self.fc = nn.Linear(hidden_size*2, 10)
def forward(self, token, attention):
out = model(token, attention)[0]
out, (hidden, cell) = self.lstm(out)
out = torch.cat([hidden[-2, :, :], hidden[-1, :, :]], dim=1)
out = self.fc(out)
return out
Approach 2 - I just feed my model with last_hidden_states of BERT as an embedding features
class Model(nn.Module):
def __init__(self, input_size, hidden_size):
super(Model, self).__init__()
self.rnn = nn.LSTM(input_size, hidden_size,
batch_first=True, bidirectional=True)
self.fc = nn.Linear(hidden_size*2, 10)
def forward(self, last_hidden_states):
out, (hidden, cell) = self.lstm(last_hidden_states)
out = torch.cat([hidden[-2, :, :], hidden[-1, :, :]], dim=1)
out = self.fc(out)
return out
Thanks