I tried to remove the unnecessary parts, hope I didn’t make it more confusing than it has to be.

For the following functions, namely using `F.mse_loss()`

for both, and using exactly the same data, I get the following output:

```
def train_multistep_single_block(training_dataset_dict, testing_dataset_dict, optimizer, network, epochs, scaler):
# Setting up parameters
block_size = 10
training_losses = []
validation_losses = []
horizon = 5
network.train()
for epoch in range(epochs):
dataset_losses = []
for d_idx, dataset1 in training_dataset_dict.items():
# note : dataset related initialization omitted
block_losses = []
for block in range(1, horizon-1):
# note: input pre-processing omitted
block_input = (torch.tensor(current_relative)).view(60)
control_input = (torch.tensor(current_relative[:,3:])).view(30)
pred_out = torch.zeros(30)
real_out = torch.zeros(30)
optimizer.zero_grad()
net_hidden = network.initHidden()
# Initializing the recurrent inputs
rec_in = torch.tensor(block_input)
rec_ffo = torch.tensor(control_input)
for bl_ind in range(1, horizon):
rec_in = rec_in.view(60)
out = network(torch.tensor(rec_in).float(), rec_ffo.float(), net_hidden)
pred_out = torch.cat((pred_out,out))
# note: pre-processing of the target value is omitted
target = torch.tensor(next_relative[:,:3]).float()
real_out = torch.cat((real_out,target.view(30)))
# Reconstructing inputs based on the previous prediction
rec_in = torch.cat((out.view(10,3), torch.tensor(next_relative[:,3:]).float()), 1) # 9
rec_ffo = torch.tensor(next_relative[:,3:]).view(30)
loss = F.mse_loss(torch.tensor(real_out),torch.tensor(pred_out))
# loss = custom_loss.mse(torch.tensor(real_out),torch.tensor(pred_out))
block_losses.append(loss.item())
loss.backward()
optimizer.step()
data_past = current_block
d_loss = sum(block_losses)/len(block_losses)
print('(%d, %f)' %(d_idx,d_loss))
dataset_losses.append(d_loss)
epoch_loss = sum(dataset_losses)/len(dataset_losses)
training_losses.append(epoch_loss)
print('epoch loss',epoch, epoch_loss)
if ((epoch ) % 10 == 0):
print("----------------------------------------------------")
print("\t Validation round.")
print("----------------------------------------------------")
validation_losses.append(testing_multi_step_prediction(testing_dataset_dict, network, scaler))
return training_losses, validation_losses
```

```
def testing_multi_step_prediction(testing_dataset_dict, network, scaler):
network.eval()
validation_loss = []
dataset_losses = []
block_size = 10
horizon = 5
with torch.no_grad():
for d_idx, dataset1 in testing_dataset_dict.items():
# note : dataset related initialization omitted
block_losses = []
for block in range(1, tot_blocks-horizon-1):
# note: input pre-processing omitted
block_input = (torch.tensor(current_relative)).view(60)
control_input = (torch.tensor(current_relative[:,3:])).view(30)
pred_out = torch.zeros(30)
real_out = torch.zeros(30)
net_hidden = network.initHidden()
# Initializing the recurrent inputs
rec_in = torch.tensor(block_input)
rec_ffo = torch.tensor(actual_forces)
for bl_ind in range(1, horizon):
rec_in = rec_in.view(60)
out = network(torch.tensor(rec_in).float(), rec_ffo.float(), net_hidden)
pred_out = torch.cat((pred_out,out))
# note: pre-processing of the target value is omitted
target = torch.tensor(next_relative[:,:3]).float()
real_out = torch.cat((real_out,target.view(30)))
# Reconstructing inputs based on the previous predictions
rec_in = torch.cat((out.view(10,3), torch.tensor(next_relative[:,3:]).float()), 1)
rec_ffo = torch.tensor(next_relative[:,3:]).view(30)
loss = F.mse_loss(torch.tensor(real_out),torch.tensor(pred_out))
block_losses.append(loss.item())
data_past = current_block
d_loss = sum(block_losses)/len(block_losses)
print('(%d, %f)' %(d_idx,d_loss))
dataset_losses.append(d_loss)
validation_loss = sum(dataset_losses)/len(dataset_losses)
print("Mean %d-step prediction error is : " %(horizon))
print(validation_loss)
return validation_loss
```

*output* :

```
(0, 53.842871)
(1, 5.945963)
(2, 7.037170)
(3, 6.868914)
(4, 3.127133)
(5, 13.591754)
(6, 10.479313)
(7, 3.686508)
(8, 4.541882)
(9, 3.013874)
(10, 36.150578)
(11, 26.826874)
(12, 20.820959)
(13, 8.622623)
(14, 14.105398)
('epoch loss', 0, 14.57745425715238)
----------------------------------------------------
Validation round.
----------------------------------------------------
(0, 0.626295)
(1, 0.123750)
(2, 0.321406)
(3, 0.196078)
(4, 0.119878)
(5, 0.228515)
(6, 0.172134)
(7, 0.085186)
(8, 0.151922)
(9, 0.049054)
(10, 0.337698)
(11, 0.206890)
(12, 0.219769)
(13, 0.109976)
(14, 0.109200)
Mean 5-step prediction error is : 0.249765358297
```

Where the output is of the form (dataset_id, dataset_loss).

When I am using this function instead:

```
def mse(prediction, target):
loss = (prediction - target)**2
ls = (loss.sum() / len(target))
return ls
```

The losses are in the same range.