Hi, i have this code about a differential and a standard learning and they are generating different values for losses, but the weights are updating exactly the same, generating, thus, the same predictions… Id like to know if in this method is there any opportunity:
def train_fit(self,
early: bool,
n_epochs: int,
batches_per_epoch: int = 16,
min_batch_size: int = 256,
lr_schedule: tuple = ((0.0, 1.0e-3), (0.2, 0.1), (0.6, 0.01), (0.9, 1.0e-4), (1.0, 1.0e-6)),
best_loss: float = float('inf'),
patience: int = 50,
min_delta: int = 0):
# Set up fit method
loss_values = []
batch_size = max(min_batch_size, self.x.size(0) // batches_per_epoch)
lr_schedule_epochs, lr_schedule_rates = zip(*lr_schedule)
counter = 0
# Run epochs
for epoch in range(n_epochs):
learning_rate = np.interp(epoch / n_epochs, lr_schedule_epochs, lr_schedule_rates)
batch_losses = []
# Run data batches
for batch_start in range(0, self.x.size(0), batch_size):
batch_end = min(batch_start + batch_size, self.x.size(0))
input = self.x[batch_start:batch_end].clone().detach().requires_grad_(True if self.differential else False)
label = self.y[batch_start:batch_end].clone().detach()
# Training itself
if self.differential:
dydx_batch = self.dydx[batch_start:batch_end].clone().detach()
output = self.forward(input)
output.mean().backward(retain_graph=True)
diff_output = input.grad
loss = 0.5 * self.criterion(output, label) + 0.5 * self.criterion(diff_output * self.lambda_j, dydx_batch * self.lambda_j)
else:
output = self.forward(input)
loss = self.criterion(output, label)
batch_losses.append(loss.item())
# Reset weight grads wrt inputs and finishes the training calculating grads wrt loss
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# Store epoch losses
loss_values.append(np.mean(batch_losses))
return loss_values
I know the problem can be elsewhere, but Id like to check this method…