Hi @googlebot @ToddChavezz
I am facing a similar issue
def train(epoch):
tr_loss, tr_accuracy = 0, 0
nb_tr_examples, nb_tr_steps = 0, 0
tr_preds, tr_labels = ,
# put model in training mode
model.train()
for idx, batch in enumerate(training_loader):
ids = batch['ids']
ids = ids.transpose(0, 1)
ids=ids.to(device)
mask = batch['mask']
mask=mask.transpose(0, 1)
mask=mask.to(device)
targets = batch['targets']
targets=targets.transpose(0, 1)
targets=targets.to(device)
optimizer.zero_grad()
print(ids.shape)
print(mask.shape)
outputs= model(input_ids=ids, attention_mask=mask, labels=targets)
loss, tr_logits = outputs.loss, outputs.logits
tr_loss += loss.item()
nb_tr_steps += 1
nb_tr_examples += targets.size(0)
if idx % 100==0:
loss_step = tr_loss/nb_tr_steps
print(f"Training loss per 100 training steps: {loss_step}")
# compute training accuracy
flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
# now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
targets = torch.masked_select(flattened_targets, active_accuracy)
predictions = torch.masked_select(flattened_predictions, active_accuracy)
tr_preds.extend(predictions)
tr_labels.extend(targets)
tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
tr_accuracy += tmp_tr_accuracy
# gradient clipping
torch.nn.utils.clip_grad_norm_(
parameters=model.parameters(), max_norm=MAX_GRAD_NORM
)
# backward pass
loss.backward()
optimizer.step()
epoch_loss = tr_loss / nb_tr_steps
tr_accuracy = tr_accuracy / nb_tr_steps
print(f"Training loss epoch: {epoch_loss}")
print(f"Training accuracy epoch: {tr_accuracy}")
Training epoch: 1
torch.Size([512, 4])
torch.Size([512, 4])
RuntimeError Traceback (most recent call last)
in <cell line: 1>()
1 for epoch in range(EPOCHS):
2 print(f"Training epoch: {epoch + 1}")
----> 3 train(epoch)
3 frames
/usr/local/lib/python3.10/dist-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1230 elif self.config.problem_type == “single_label_classification”:
1231 loss_fct = CrossEntropyLoss()
→ 1232 loss = loss_fct(logits.contiguous().view(-1, self.num_labels), labels.contiguous().view(-1))
1233 elif self.config.problem_type == “multi_label_classification”:
1234 loss_fct = BCEWithLogitsLoss()
RuntimeError: view size is not compatible with input tensor’s size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(…) instead.