Hello I would like to modify the encoder layers of the BERT model, to insert FC and ReLu layers.
This idea allows you to reproduce the use of Squeeze-and-Excitation Networks
How to use an nn.module class to handle encoder outputs?
import torch.nn as nn from transformers import BertModel class CustomBERTModel(nn.Module): def __init__(self): super(CustomBERTModel, self).__init__() self.bert = BertModel.from_pretrained("bert-base-uncased") # add your additional layers here, for example a dropout layer followed by a linear classification head self.dropout = nn.Dropout(0.3) self.out = nn.Linear(768, 2) def forward(self, ids, mask, token_type_ids): sequence_output, pooled_output = self.bert( ids, attention_mask=mask, token_type_ids=token_type_ids ) # we apply dropout to the sequence output, tensor has shape (batch_size, sequence_length, 768) sequence_output = self.dropout(sequence_output) # next, we apply the linear layer. The linear layer (which applies a linear transformation) # takes as input the hidden states of all tokens (so seq_len times a vector of size 768, each corresponding to # a single token in the input sequence) and outputs 2 numbers (scores, or logits) for every token # so the logits are of shape (batch_size, sequence_length, 2) logits = self.out(sequence_output) return logits