Hi all!
I am having this error when trying to fine-tune a model using Hugging Face Transformers:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-33-765da1bd258d> in <cell line: 1>()
----> 1 trainer = Trainer(
2 model=model,
3 args=args,
4 data_collator=data_collator,
5 train_dataset=train_dataset,
5 frames
/usr/local/lib/python3.10/dist-packages/torch/cuda/random.py in cb()
120 for i in range(device_count()):
121 default_generator = torch.cuda.default_generators[i]
--> 122 default_generator.manual_seed(seed)
123
124 _lazy_call(cb, seed_all=True)
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
I know that there are different posts discussing this issue, but none of them has helped me. Can somebody give me a solution?
Here’s my code if it helps having a more comprehensive vision:
model = AutoModelForSequenceClassification.from_pretrained("ClassCat/roberta-small-basque", num_labels=12)
tokenizer = AutoTokenizer.from_pretrained("ClassCat/roberta-small-basque")
dataset_eu = load_dataset("orai-nlp/basqueGLUE", "bhtc")
def tokenizer_func(examples):
return tokenizer(examples['text'], truncation=True)
tokenized_datasets = dataset_eu.map(tokenizer_func, batched=True)
train_dataset = tokenized_datasets["train"]
validation_dataset = tokenized_datasets["validation"]
import re
def clean_text(data):
text = data.get('text', '')
data['text'] = text
return data
train_dataset_clean = train_dataset.map(clean_text)
validation_dataset_clean = validation_dataset.map(clean_text)
model_name = 'XLM-EusBERTa-topic-classification'
batch_size = 8
num_epochs = 20
args = TrainingArguments(
f'{model_name}',
evaluation_strategy = "epoch",
save_strategy = "no",
learning_rate=1e-4,
per_device_eval_batch_size=batch_size,
per_device_train_batch_size=batch_size,
weight_decay=0.01,
num_train_epochs=num_epochs,
lr_scheduler_type='cosine',
warmup_ratio=0.1
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
print("True labels:", labels)
print("Predicted labels:", preds)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="weighted", zero_division=0)
acc = accuracy_score(labels, preds)
return {"accuracy": acc, "F1": f1, "precision": precision, "recall": recall}
trainer = Trainer(
model=model,
args=args,
data_collator=data_collator,
train_dataset=train_dataset,
eval_dataset=validation_dataset,
compute_metrics=compute_metrics
)
trainer.train()