I am trying to ensemble 5 transformers inspired by
Concatenate the output of Bert and transformer.
My code for the model is following:
class BERTClassA(torch.nn.Module):
def init(self):
super(BERTClassA, self).init()
self.l1 = BertModel.from_pretrained('bert-base-uncased')
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)
def forward(self, input_ids, attention_mask):
output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
hidden_state = output_1[0]
pooler = hidden_state[:, 0]
pooler = self.pre_classifier(pooler)
pooler = torch.nn.ReLU()(pooler)
output = self.dropout(pooler)
return output
class BERTClassB(torch.nn.Module):
def init(self):
super(BERTClassB, self).init()
self.l2 = TFRobertaModel.from_pretrained('roberta-base')
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)
def forward(self, input_ids, attention_mask):
output_2 = self.l2(input_ids=input_ids, attention_mask=attention_mask)
hidden_state = output_2[0]
pooler = hidden_state[:, 0]
pooler = self.pre_classifier(pooler)
pooler = torch.nn.ReLU()(pooler)
output = self.dropout(pooler)
return output
class BERTClassC(torch.nn.Module):
def init(self):
super(BERTClassC, self).init()
self.l3 = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased', num_labels = 2)
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)
def forward(self, input_ids, attention_mask):
output_3 = self.l3(input_ids=input_ids, attention_mask=attention_mask)
hidden_state = output_3[0]
pooler = hidden_state[:, 0]
pooler = self.pre_classifier(pooler)
pooler = torch.nn.ReLU()(pooler)
output = self.dropout(pooler)
return output
class BERTClassD(torch.nn.Module):
def init(self):
super(BERTClassD, self).init()
self.l4 = DistilBertModel.from_pretrained('distilbert-base-uncased',output_hidden_states=True)
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)
def forward(self, input_ids, attention_mask):
output_4 = self.l4(input_ids=input_ids, attention_mask=attention_mask)
hidden_state = output_4[0]
pooler = hidden_state[:, 0]
pooler = self.pre_classifier(pooler)
pooler = torch.nn.ReLU()(pooler)
output = self.dropout(pooler)
return output
class BERTClassE(torch.nn.Module):
def init(self):
super(BERTClassE, self).init()
self.l5 = ElectraForSequenceClassification.from_pretrained('google/electra-base-discriminator',num_labels=2,return_dict= True)
self.pre_classifier = torch.nn.Linear(768, 768)
self.dropout = torch.nn.Dropout(.3)
def forward(self, input_ids, attention_mask):
output_5 = self.l5(input_ids=input_ids, attention_mask=attention_mask)
hidden_state = output_5[0]
pooler = hidden_state[:, 0]
pooler = self.pre_classifier(pooler)
pooler = torch.nn.ReLU()(pooler)
output = self.dropout(pooler)
return output
class MyEnsemble(torch.nn.Module):
def init(self, modelA, modelB,modelC,modelD,modelE):
super(MyEnsemble, self).init()
self.modelA = modelA
self.modelB = modelB
self.modelC=modelC
self.modelD=modelD
self.modelE=modelE
self.classifier = torch.nn.Linear(768, 6)
def forward(self, x1, x2,x3,x4,x5):
x1 = self.modelA(x1)
x2 = self.modelB(x2)
x3 = self.modelC(x3)
x4 = self.modelD(x4)
x5 = self.modelE(x5)
x = torch.cat((x1, x2, x3, x4, x5), dim=1)
x = self.classifier(F.relu(x))
return x
However while running the training epoch i am getting the following error,
Epoch 1/4
----------
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py:2257: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).
FutureWarning,
/usr/local/lib/python3.7/dist-packages/transformers/tokenization_utils_base.py:2257: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).
FutureWarning,
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-55-672eca036a61> in <module>()
----> 1 get_ipython().run_cell_magic('time', '', "\nhistory = defaultdict(list)\nbest_accuracy = 0\n\nfor epoch in range(EPOCHS):\n\n print(f'Epoch {epoch + 1}/{EPOCHS}')\n print('-' * 10)\n\n train_acc, train_loss = train_epoch(\n model,\n train_data_loader, \n loss_fn, \n optimizer, \n device, \n scheduler, \n len(df_train)\n )\n\n print(f'Train loss {train_loss} accuracy {train_acc}')\n\n val_acc, val_loss = eval_model(\n model,\n val_data_loader,\n loss_fn, \n device, \n len(df_val)\n )\n\n print(f'Val loss {val_loss} accuracy {val_acc}')\n print()\n\n history['train_acc'].append(train_acc)\n history['train_loss'].append(train_loss)\n history['val_acc'].append(val_acc)\n history['val_loss'].append(val_loss)\n\n if val_acc > best_accuracy:\n torch.save(model.state_dict(), 'best_model_state_ensemble_1.bin')\n best_accuracy = val_acc")
4 frames
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2115 magic_arg_s = self.var_expand(line, stack_depth)
2116 with self.builtin_trap:
-> 2117 result = fn(magic_arg_s, cell)
2118 return result
2119
<decorator-gen-53> in time(self, line, cell, local_ns)
/usr/local/lib/python3.7/dist-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
/usr/local/lib/python3.7/dist-packages/IPython/core/magics/execution.py in time(self, line, cell, local_ns)
1191 else:
1192 st = clock2()
-> 1193 exec(code, glob, local_ns)
1194 end = clock2()
1195 out = None
<timed exec> in <module>()
<ipython-input-51-0f1c9c9d09ab> in train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples)
21 outputs = model(
22 input_ids=input_ids,
---> 23 attention_mask=attention_mask
24 )
25
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
TypeError: forward() got an unexpected keyword argument 'input_ids
Although i am passing the input_ids and attention masks in the forward method for each of the transformers the final ensemble model is unable to incorporate that.
I called the models classes with the following code:
modelA = BERTClassA()
modelB = BERTClassB()
modelC = BERTClassC()
modelD = BERTClassD()
modelE = BERTClassE()
and finally the ensemble class is as follows:
model = MyEnsemble()
model.to(device)
Clearly i am doing some mistake somewhere.
Your help would be much appreciated.