I am fine tuning a Longformer Encoder Decoder model for multi document text summarization. When I try to run through the forward pass, it gives me an error “index out of range in self”. The input shape seems to be correct, but the debugger points to something in torch Embedding going wrong. How do I fix this?
num_epochs = 8
num_training_steps = num_epochs * len(train_dataloader)
optimizer = Adam(MODEL.parameters(), lr=3e-5)
lr_scheduler = get_scheduler(
name="linear", optimizer=optimizer, num_warmup_steps=1, num_training_steps=num_training_steps # CHANGE LATER!!!!!!!
)
progress_bar = tqdm(range(num_training_steps))
# Training mode
MODEL.train()
for epoch in range(num_epochs):
for batch_idx, batch in enumerate(train_dataloader):
# Encode data
input_ids_all = []
for cluster in batch["document"]:
articles = cluster.split("|||||")[:-1]
for i, article in enumerate(articles):
article = article.replace("\n", " ")
article = " ".join(article.split())
articles[i] = article
input_ids = []
for article in articles:
input_ids.extend(TOKENIZER.encode(article, truncation=True, max_length=4096 // len(articles))[1:-1])
input_ids.append(DOCSEP_TOKEN_ID)
input_ids = ([TOKENIZER.bos_token_id]+input_ids+[TOKENIZER.eos_token_id])
input_ids_all.append(torch.tensor(input_ids))
input_ids = torch.nn.utils.rnn.pad_sequence(input_ids_all, batch_first=True, padding_value=PAD_TOKEN_ID)
# Forward pass
global_attention_mask = torch.zeros_like(input_ids)
global_attention_mask[:, 0] = 1
global_attention_mask[input_ids == DOCSEP_TOKEN_ID] = 1
print(input_ids.shape)
# outputs = MODEL.forward(input_ids) # <---------------------------------------------------------------------------------------------- causing a bug
outputs = MODEL.forward(input_ids=input_ids_all, global_attention_mask=global_attention_mask)
# Backprop
loss = outputs.loss
loss.backward()
# GD
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
progress_bar.update(1)
# Decode output
generated_str = TOKENIZER.batch_decode(generated_ids.tolist(), skip_special_tokens=True)
metric.add_batch(predictions=generated_str, references=batch["summary"])
# Calculate metrics
print(f"Epoch: {epoch+1}, Batch: {batch_idx+1}:")
print(metric.compute())