I am trying to fine tune a huggingface transformer using skorch
. I followed the example notebook from skorch for the implementation (Jupyter Notebook Viewer)
The fine tuning works like in the example notebook, but now I want to apply RandomizedSearchCV
from sklearn to tune the hyperparameters of the transformer model.
I am able to pass the parameter ranges to the pytorch module in my pipeline (BertModule
), but neither the skorch documentation nor the huggingface documentation specifies how the hyperparameters can be used for training in the pytorch module.
Normally, with the Huggingface Trainer API some TrainingArguments
could be defined, but I don’t see how they can be used with skorch.
In this case, I want to tune the hyper parameters weight_decay
and warmup_steps
for the transformer model.
My code looks like the following:
class BertModule(nn.Module):
def __init__(
self,
name: str,
num_labels: int,
# where to use them though?
weight_decay: float = 0.0,
warmup_steps: int = 0,
):
super().__init__()
self.name = name
self.num_labels = num_labels
self.reset_weights()
def reset_weights(self):
self.bert = AutoModelForSequenceClassification.from_pretrained(
self.name, num_labels=self.num_labels
)
def forward(self, **kwargs):
pred = self.bert(**kwargs)
return pred.logits
pipe = Pipeline(
[
("tokenizer", HuggingfacePretrainedTokenizer(TOKENIZER)),
(
"net",
NeuralNetClassifier(
BertModule,
module__name=PRETRAINED_MODEL,
module__num_labels=len(set(y_train)),
module__weight_decay=0.0,
module__warmup_steps=0,
optimizer=OPTIMIZER,
lr=LR,
max_epochs=MAX_EPOCHS,
criterion=CRITERION,
batch_size=BATCH_SIZE,
iterator_train__shuffle=True,
device=DEVICE,
callbacks=[
LRScheduler(LambdaLR, lr_lambda=lr_schedule, step_every="batch"),
ProgressBar(),
],
),
),
]
)
params = {
"net__lr": loguniform(1e-5, 1e-3),
"net__max_epochs": randint(2, 5),
"net__batch_size": [8, 16, 32, 64, 128],
"net__module__weight_decay": uniform(0, 0.3),
"net__module__warmup_steps": randint(0, 1000),
}
rs = RandomizedSearchCV(
pipe, params, n_iter=10, verbose=2, refit=True, scoring="accuracy", cv=5
)
rs.fit(X_train, y_train)
Thanks for the help!