Hello, I am a newbie to this space and I am trying to create my first customized zero-shot-classification model based on some tutorials I’ve found. However, I’m stuck because I am getting this MPS error on my M1:
RuntimeError: Placeholder storage has not been allocated on MPS device!
I understand this to mean that there is a mismatch where not everything resides on the MPS device. However what I don’t understand is what exactly I’ve missed moving to MPS? Can someone please take a look at my code and help me get unstuck / explain what I’m missing? I’d appreciate it very much!!
import pandas as pd # For loading data
import numpy as np
import torch
from transformers import AutoTokenizer
from datasets import Dataset, DatasetDict
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
train_perc = 0.9
validate_perc = 0.5
dataframe = pd.read_csv('model-data/tags-training-data.csv')
dataframe = dataframe.convert_dtypes()
num_training_examples = int(dataframe.shape[0] * train_perc)
dataframe_train = dataframe.iloc[:num_training_examples]
dataframe_test = dataframe.iloc[num_training_examples:]
num_validate_examples = int(dataframe_test.shape[0] * validate_perc)
dataframe_validate = dataframe_test.iloc[:num_validate_examples]
dataset = DatasetDict()
dataset['train'] = Dataset.from_pandas(dataframe_train)
dataset['test'] = Dataset.from_pandas(dataframe_test)
dataset['validation'] = Dataset.from_pandas(dataframe_validate)
labels = [label for label in dataset['train'].features.keys() if label not in ['query']]
id2label = {idx: label for idx, label in enumerate(labels)}
label2id = {label: idx for idx, label in enumerate(labels)}
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def preprocess_data(examples):
text = examples["query"]
encoding = tokenizer(text, padding="max_length", truncation=True, max_length=256)
labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
labels_matrix = np.zeros((len(text), len(labels)))
for idx, label in enumerate(labels):
labels_matrix[:, idx] = labels_batch[label]
encoding["labels"] = labels_matrix.tolist()
return encoding;
def test_data(examples):
return examples.to(device);
encoded_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset['train'].column_names)
encoded_dataset.set_format('torch')
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased",
problem_type="multi_label_classification",
num_labels=len(labels),
id2label=id2label,
label2id=label2id)
model = model.to(device)
batch_size = 8
metric_name = "f1"
args = TrainingArguments(
f"bert-finetuned-sem_eval-english",
eval_strategy = "epoch",
save_strategy = "epoch",
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=5,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model=metric_name
)
def multi_label_metrics(predictions, labels, threshold=0.5):
# first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(torch.Tensor(predictions))
# next, use threshold to turn them into integer predictions
y_pred = np.zeros(probs.shape)
y_pred[np.where(probs >= threshold)] = 1
# finally, compute metrics
y_true = labels
f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
accuracy = accuracy_score(y_true, y_pred)
# return as dictionary
metrics = {'f1': f1_micro_average,
'roc_auc': roc_auc,
'accuracy': accuracy}
return metrics
def compute_metrics(p: EvalPrediction):
preds = p.predictions[0] if isinstance(p.predictions,
tuple) else p.predictions
result = multi_label_metrics(
predictions=preds,
labels=p.label_ids)
return result
trainer = Trainer(
model,
args,
train_dataset=encoded_dataset["train"],
eval_dataset=encoded_dataset["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()