I want to get a list of indexes for text items where the LLM’s output has the same sentiment as the input. However, I’m getting errors in PyTorch and don’t understand why.
/pytorch/aten/src/ATen/native/cuda/Indexing.cu:1369: indexSelectSmallIndex: block: [21,0,0], thread: [1,0,0] Assertion srcIndex < srcSelectDimSize failed./pytorch/aten/src/ATen/native/cuda/Indexing.cu:1369: indexSelectSmallIndex: block: [21,0,0], thread: [2,0,0] Assertion srcIndex < srcSelectDimSize failed.
RuntimeError Traceback (most recent call last)/tmp/ipykernel_94/921698949.py in <cell line: 0>()----> 1 ng_ds_indxs = filter_input_by_output_sentiment(ng_dl, “NEGATIVE”)/tmp/ipykernel_94/4054871963.py in filter_input_by_output_sentiment(dataloader, expected_sentiment)2728 # Generate sequences for the whole batch—> 29 generated_ids = llm.generate(30 input_ids=input_ids,31 attention_mask=attention_mask,
/tmp/ipykernel_94/3559914769.py in generate(self, input_ids, attention_mask, **gen_kwargs)6061 with torch.no_grad():—> 62 generated_ids = self.model.generate(63 input_ids=input_ids,64 attention_mask=attention_mask,
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
My Code:
tokenizer = AutoTokenizer.from_pretrained(config.model_name_chat)
tokenizer.model_max_length = 32
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
os.environ["TOKENIZERS_PARALLELISM"] = "false"
class ConceptDataset(torch.utils.data.Dataset):
def __init__(self, df, tokenizer):
self.texts = df["review"].tolist()
self.tokenizer = tokenizer
def __len__(self):
return len(self.texts)
def __getitem__(self, index):
text = self.texts[index].strip()
encoded = self.tokenizer(
text,
max_length=self.tokenizer.model_max_length,
padding="max_length",
truncation=True,
return_tensors="pt"
)
return {
"ids": encoded["input_ids"].squeeze(0), # shape: (seq_len)
"mask": encoded["attention_mask"].squeeze(0), # shape: (seq_len)
}
train_pos_ds = ConceptDataset(df_train_pos, tokenizer)
ng_dl = DataLoader(train_ng_ds, batch_size=8, num_workers=workers, pin_memory=True)
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(config.model_name_chat, device_map='auto', quantization_config=bnb_config)
sentiment = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device=device, truncation=True, framework="pt")
clear_output()
def filter_input_by_output_sentiment(dataloader, expected_sentiment):
valid_sentiments = {"POSITIVE", "NEGATIVE"}
expected_sentiment = expected_sentiment.upper()
if expected_sentiment not in valid_sentiments:
raise ValueError(f"expected_sentiment must be one of {valid_sentiments}, got '{expected_sentiment}'")
matching_indexes = []
global_index = 0
for batch in tqdm(dataloader, total=len(dataloader), desc="Filtering by sentiment"):
input_ids = batch["ids"].to(device)
attention_mask = batch["mask"].to(device)
generated_ids = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_new_tokens=50,
)
batch_texts = []
for i in range(input_ids.size(0)):
input_len = input_ids.size(1)
new_tokens = generated_ids[i][input_len:]
text = tokenizer.decode(new_tokens, skip_special_tokens=True, clean_up_tokenization_spaces=True)
batch_texts.append(text)
results = sentiment(batch_texts)
for i, res in enumerate(results):
if res["label"] == expected_sentiment:
matching_indexes.append(global_index + i)
global_index += input_ids.size(0)
return matching_indexes
ng_ds_indxs = filter_input_by_output_sentiment(ng_dl, "NEGATIVE")