I’m training GRU model in PyTorch for timeseries forecasting.
This is my model
class GRU(nn.Module):
def __init__(self, args, input_dim):
super(GRU, self).__init__()
self.args = args
self.hidden_dim = args.hidden_units1
self.input_dim = input_dim
self.output_dim = args.pred_len
self.layer_dim = args.num_layers
# GRU layers
self.gru = nn.GRU(
self.input_dim, self.hidden_dim, self.layer_dim, batch_first=True,
dropout=args.dropout, bidirectional=True)
self.fc = nn.Linear(self.hidden_dim * 2, self.output_dim)
def forward(self, x, eva=False): ....
My train Class:
class TorchTrainer:
def __init__(self, model, loss_fn, optimizer):
self.model = model
self.loss_fn = loss_fn
self.optimizer = optimizer
self.train_losses = []
self.val_losses = []
def train_step(self, x, y):
self.model.train()
yhat = self.model(x)
loss = self.loss_fn(y, yhat)
loss.backward()
self.optimizer.step()
self.optimizer.zero_grad()
return loss.item()
def train(self, train_loader, val_loader, batch_size, n_epochs, n_features, result_path, best_loss=5):
for epoch in range(1, n_epochs + 1):
self.optimizer.zero_grad()
batch_losses = []
for x_batch, y_batch in train_loader:
x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
y_batch = y_batch.to(device)
loss = self.train_step(x_batch, y_batch)
batch_losses.append(loss)
training_loss = np.mean(batch_losses)
self.train_losses.append(training_loss)
with torch.no_grad():
batch_val_losses = []
for x_val, y_val in val_loader:
x_val = x_val.view([batch_size, -1, n_features]).to(device)
y_val = y_val.to(device)
self.model.eval()
yhat = self.model(x_val)
predictions = yhat.detach().cpu().numpy()
val_loss = self.loss_fn(y_val, yhat).item()
batch_val_losses.append(val_loss)
validation_loss = np.mean(batch_val_losses)
self.val_losses.append(validation_loss)
mse = MSE(y_val.detach().cpu().numpy(), predictions)
if mse <= best_loss:
best_loss = mse
save_checkpoint(mode)
print(
"epoch : {} , t_loss : {} , v_loss : {}, best loss : {}".format(epoch, training_loss, validation_loss,
best_loss))
def evaluate(self, test_loader, batch_size=1, n_features=2):
with torch.no_grad():
preds = []
trues = []
for x_test, y_test in test_loader:
x_test = x_test.view([batch_size, -1, n_features]).to(device)
y_test = y_test.to(device)
self.model.eval()
yhat = self.model(x_test, True)
yhat=yhat.cpu().data.numpy()
preds.append(yhat)
y_test=y_test.cpu().data.numpy()
trues.append(y_test)
preds = np.array(preds)
trues = np.array(trues)
I want to save the model checkpoints everytime the model achives new best performance, to ensure that I will have the best-performing model, even if training is interrupted or if overfitting occurs later in the training process.
To do so, I added theses lines:
mse = MSE(y_val.detach().cpu().numpy(), predictions)
if mse <= best_loss:
best_loss = mse
save_checkpoint(mode)
But I’m not sure if I’m doing this correctly. Because the best_valid values I get during the training output are not stable