RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead

Hi,

based on suggestion of the error I used reshape() instead of view(). Also, I tested contiguous().view(-1) as well, but still I face the same error. Would anyone assist in it?

this is my error: RuntimeError: view size is not compatible with input tensor’s size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(…) instead.

this is my code:

 from tqdm import tqdm

tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")
with open("question_answer.txt", "r") as file:
    text = file.read()
questions = text.split("\n")[:-1]
answers = text.split("\n")[1:]

# Define the maximum number of lines for training
max_lines = 50

# Create a progress bar
progress_bar = tqdm(total=min(max_lines, len(questions)), desc="Processing")

batch_size = 4  # Adjust the batch size according to your memory capacity

inputs = []
target_texts = []

loss_values = []  # Store the losses for each batch
optimizer_values = []  # Store the optimizer values for each batch

for i, question in enumerate(questions[:max_lines]):
    if i % batch_size == 0 and i != 0:
        tokenized_inputs = tokenizer.batch_encode_plus(
            inputs,
            padding="longest",
            truncation=True,
            return_tensors="pt"
        )
        tokenized_targets = tokenizer.batch_encode_plus(
            target_texts,
            padding="longest",
            truncation=True,
            return_tensors="pt"
        )

        input_ids = tokenized_inputs["input_ids"]
        attention_mask = tokenized_inputs["attention_mask"]
        target_ids = tokenized_targets["input_ids"]
        decoder_attention_mask = tokenized_targets["attention_mask"]

        model.train()
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
        loss_fn = torch.nn.CrossEntropyLoss()

        optimizer.zero_grad()
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_input_ids=target_ids[:, :-1],
            decoder_attention_mask=decoder_attention_mask[:, :-1],
            labels=target_ids[:, 1:]
        )

        lm_logits = outputs.logits
        loss = loss_fn(lm_logits.reshape(-1, lm_logits.size(-1)), target_ids[:, 1:].reshape(-1))
        loss.backward()
        optimizer.step()

        loss_values.append(loss.item())
        optimizer_values.append(optimizer.param_groups[0]['lr'])

        inputs = []
        target_texts = []

        print(f"Batch {i//batch_size}, Loss: {loss.item()}")

    input_text = question.format_map({'item': data.iloc[0]})
    inputs.append(input_text)
    target_texts.append(answers[i])

    # Update the progress bar
    progress_bar.update(1)

# Process the remaining batch
if inputs:
    tokenized_inputs = tokenizer.batch_encode_plus(
        inputs,
        padding="longest",
        truncation=True,
        return_tensors="pt"
    )
    tokenized_targets = tokenizer.batch_encode_plus(
        target_texts,
        padding="longest",
        truncation=True,
        return_tensors="pt"
    )

    input_ids = tokenized_inputs["input_ids"]
    attention_mask = tokenized_inputs["attention_mask"]
    target_ids = tokenized_targets["input_ids"]
    decoder_attention_mask = tokenized_targets["attention_mask"]

    model.train()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
    loss_fn = torch.nn.CrossEntropyLoss()

    optimizer.zero_grad()
    outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        decoder_input_ids=target_ids[:, :-1],
        decoder_attention_mask=decoder_attention_mask[:, :-1],
        labels=target_ids[:, 1:]
    )

    lm_logits = outputs.logits
    loss = loss_fn(lm_logits.reshape(-1, lm_logits.size(-1)), target_ids[:, 1:].reshape(-1))

    loss_values.append(loss.item())
    optimizer_values.append(optimizer.param_groups[0]['lr'])

Could you post the .size() and .stride() of the failing tensor, please?