I have a training function like so:
def training():
model.train()
train_mae = []
progress = tqdm(train_dataloader, desc='Training')
for batch_index, batch in enumerate(progress):
x = batch['x'].to(device)
x_lengths = batch['x_lengths'].to(device)
y = batch['y'].to(device)
y_type = batch['y_type'].to(device)
y_valid_indices = batch['y_valid_indices'].to(device)
# Zero Gradients
optimizer.zero_grad()
# Forward pass
y_first, y_second = model(x)
losses = []
for j in range(len(x_lengths)):
x_length = x_lengths[j].item()
if y_type[j].item() == 0:
predicted = y_first[j]
else:
predicted = y_second[j]
actual = y[j]
valid_mask = torch.zeros_like(predicted, dtype=torch.bool)
valid_mask[:x_length] = 1
# Padding of -1 is removed from y
indices_mask = y[j].ne(-1)
valid_indices = y[j][indices_mask]
valid_predicted = predicted[valid_mask]
valid_actual = actual[valid_mask]
loss = mae_fn(valid_predicted, valid_actual, valid_indices)
losses.append(loss)
# Backward pass and update
loss = torch.stack(losses).mean() # This fails due to different shapes
loss.backward()
optimizer.step()
train_mae.append(loss.detach().cpu().numpy())
progress.set_description(
f"mae: {loss.detach().cpu().numpy():.4f}"
)
# Return the average MAEs for y type
return (
np.mean(train_mae)
)
def mae_fn(output, target, indices):
clipped_target = torch.clip(target, min=0, max=1)
maes = F.l1_loss(output, clipped_target, reduction='none')
return maes[indices]
Obviously can’t stack these losses since they have different shape due to the indices. Taking mean on maes[indices]
will solve the issue, but it’s resulting in very bad test loss. What do I to calculate the loss here since indices determine the shape depending on y_type.
How do I calculate loss here?