This is my code, I get this error. Tried to converrt df[labels] to torch.float but the issue still exists.
Getting —> trainer.train() error.
# Convert to Hugging Face dataset
dataset = Dataset.from_pandas(df)
# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(mlb.classes_), problem_type="multi_label_classification")
# Tokenize data
def preprocess_function(examples):
return tokenizer(examples['job_title'], truncation=True, padding=True)
tokenized_dataset = dataset.map(preprocess_function, batched=True)
# Ensure labels are of type torch.float (this is required for multi-label classification)
def cast_to_float(example):
example['labels'] = torch.tensor(example['labels'], dtype=torch.float) # Convert labels to torch.float
return example
tokenized_dataset = tokenized_dataset.map(cast_to_float)
# Training arguments
training_args = TrainingArguments(
output_dir="./results",
evaluation_strategy="epoch",
save_strategy="epoch",
num_train_epochs=3,
per_device_train_batch_size=8,
logging_dir="./logs",
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
eval_dataset=tokenized_dataset, # Ideally, you should split this into train/test datasets.
)
# Train model
trainer.train()