Newbie Struggling with MPS

Hello, I am a newbie to this space and I am trying to create my first customized zero-shot-classification model based on some tutorials I’ve found. However, I’m stuck because I am getting this MPS error on my M1:

RuntimeError: Placeholder storage has not been allocated on MPS device!

I understand this to mean that there is a mismatch where not everything resides on the MPS device. However what I don’t understand is what exactly I’ve missed moving to MPS? Can someone please take a look at my code and help me get unstuck / explain what I’m missing? I’d appreciate it very much!!

import pandas as pd # For loading data
import numpy as np
import torch

from transformers import AutoTokenizer
from datasets import Dataset, DatasetDict
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

train_perc = 0.9
validate_perc = 0.5

dataframe = pd.read_csv('model-data/tags-training-data.csv')
dataframe = dataframe.convert_dtypes()
num_training_examples = int(dataframe.shape[0] * train_perc)

dataframe_train = dataframe.iloc[:num_training_examples]
dataframe_test = dataframe.iloc[num_training_examples:]

num_validate_examples = int(dataframe_test.shape[0] * validate_perc)
dataframe_validate = dataframe_test.iloc[:num_validate_examples]

dataset = DatasetDict()
dataset['train'] = Dataset.from_pandas(dataframe_train)
dataset['test'] = Dataset.from_pandas(dataframe_test)
dataset['validation'] = Dataset.from_pandas(dataframe_validate)

labels = [label for label in dataset['train'].features.keys() if label not in ['query']]
id2label = {idx: label for idx, label in enumerate(labels)}
label2id = {label: idx for idx, label in enumerate(labels)}

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def preprocess_data(examples):
    text = examples["query"]
    encoding = tokenizer(text, padding="max_length", truncation=True, max_length=256)
    labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
    labels_matrix = np.zeros((len(text), len(labels)))

    for idx, label in enumerate(labels):
        labels_matrix[:, idx] = labels_batch[label]

    encoding["labels"] = labels_matrix.tolist()
    
    return encoding;

def test_data(examples):
    return examples.to(device);

encoded_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset['train'].column_names)
encoded_dataset.set_format('torch')

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", 
   problem_type="multi_label_classification", 
   num_labels=len(labels),
   id2label=id2label,
   label2id=label2id)

model = model.to(device)

batch_size = 8
metric_name = "f1"

args = TrainingArguments(
    f"bert-finetuned-sem_eval-english",
    eval_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name
)

def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, 
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds, 
        labels=p.label_ids)
    return result

trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

I am not familiar with Trainer API but, here

train_dataset=encoded_dataset["train"],
eval_dataset=encoded_dataset["validation"],

I don’t see where you have provided command to use “mps” backend for the inputs to the model.