Hi,
based on suggestion of the error I used reshape() instead of view(). Also, I tested contiguous().view(-1) as well, but still I face the same error. Would anyone assist in it?
this is my error: RuntimeError: view size is not compatible with input tensor’s size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(…) instead.
this is my code:
from tqdm import tqdm
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")
with open("question_answer.txt", "r") as file:
text = file.read()
questions = text.split("\n")[:-1]
answers = text.split("\n")[1:]
# Define the maximum number of lines for training
max_lines = 50
# Create a progress bar
progress_bar = tqdm(total=min(max_lines, len(questions)), desc="Processing")
batch_size = 4 # Adjust the batch size according to your memory capacity
inputs = []
target_texts = []
loss_values = [] # Store the losses for each batch
optimizer_values = [] # Store the optimizer values for each batch
for i, question in enumerate(questions[:max_lines]):
if i % batch_size == 0 and i != 0:
tokenized_inputs = tokenizer.batch_encode_plus(
inputs,
padding="longest",
truncation=True,
return_tensors="pt"
)
tokenized_targets = tokenizer.batch_encode_plus(
target_texts,
padding="longest",
truncation=True,
return_tensors="pt"
)
input_ids = tokenized_inputs["input_ids"]
attention_mask = tokenized_inputs["attention_mask"]
target_ids = tokenized_targets["input_ids"]
decoder_attention_mask = tokenized_targets["attention_mask"]
model.train()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer.zero_grad()
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask,
decoder_input_ids=target_ids[:, :-1],
decoder_attention_mask=decoder_attention_mask[:, :-1],
labels=target_ids[:, 1:]
)
lm_logits = outputs.logits
loss = loss_fn(lm_logits.reshape(-1, lm_logits.size(-1)), target_ids[:, 1:].reshape(-1))
loss.backward()
optimizer.step()
loss_values.append(loss.item())
optimizer_values.append(optimizer.param_groups[0]['lr'])
inputs = []
target_texts = []
print(f"Batch {i//batch_size}, Loss: {loss.item()}")
input_text = question.format_map({'item': data.iloc[0]})
inputs.append(input_text)
target_texts.append(answers[i])
# Update the progress bar
progress_bar.update(1)
# Process the remaining batch
if inputs:
tokenized_inputs = tokenizer.batch_encode_plus(
inputs,
padding="longest",
truncation=True,
return_tensors="pt"
)
tokenized_targets = tokenizer.batch_encode_plus(
target_texts,
padding="longest",
truncation=True,
return_tensors="pt"
)
input_ids = tokenized_inputs["input_ids"]
attention_mask = tokenized_inputs["attention_mask"]
target_ids = tokenized_targets["input_ids"]
decoder_attention_mask = tokenized_targets["attention_mask"]
model.train()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer.zero_grad()
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask,
decoder_input_ids=target_ids[:, :-1],
decoder_attention_mask=decoder_attention_mask[:, :-1],
labels=target_ids[:, 1:]
)
lm_logits = outputs.logits
loss = loss_fn(lm_logits.reshape(-1, lm_logits.size(-1)), target_ids[:, 1:].reshape(-1))
loss_values.append(loss.item())
optimizer_values.append(optimizer.param_groups[0]['lr'])