I have corrected the code to save dict of multiple LSTM Cells and load them individually like below,
# Save the checkpoint
save_checkpoints({
'num_epochs': epoch,
'num_hidden': number_hidden,
'num_cells': number_cells,
'device': device,
'state_linear': model.state_dict(),
'state_dict0': model.cell_list[0].state_dict(),
'state_dict1': model.cell_list[1].state_dict(),
'state_dict2': model.cell_list[2].state_dict()}, file_name)
def save_checkpoints(state, file_name):
"""
Save the trained model and check points related to model
:param state: state of the model to save
:param file_name: file where to save the model
:return:
"""
torch.save(state, file_name)
And when I now load the same checkpoint file multiple times like below, I get the correct results every time.
checkpoint = torch.load(_ckpt_files[0])
_epochs = checkpoint['num_epochs']
num_hidden = checkpoint['num_hidden']
num_cells = checkpoint['num_cells']
dev = checkpoint['device']
if dev is None:
dev = "cpu"
else:
dev = "gpu"
seq1 = Seq2seq(num_hidden=num_hidden, num_cells=num_cells, device=dev)
seq1.load_state_dict(checkpoint['state_linear'])
seq1.cell_list[0].load_state_dict(checkpoint['state_dict0'])
seq1.cell_list[1].load_state_dict(checkpoint['state_dict1'])
seq1.cell_list[2].load_state_dict(checkpoint['state_dict2'])
seq1.to(seq1.device)
seq1.double()
seq1.eval()
_, _ = test(csv_data=current_data[0], train_size=train_size, test_size=test_size,
data_col=_data_col_list[0], time_col=_timestamp_col_list[0], seq=seq1,
result_file=None, show=0)
checkpoint = torch.load(_ckpt_files[0])
_epochs = checkpoint['num_epochs']
num_hidden = checkpoint['num_hidden']
num_cells = checkpoint['num_cells']
dev = checkpoint['device']
if dev is None:
dev = "cpu"
else:
dev = "gpu"
seq2 = Seq2seq(num_hidden=num_hidden, num_cells=num_cells, device=dev)
seq2.load_state_dict(checkpoint['state_linear'])
seq2.cell_list[0].load_state_dict(checkpoint['state_dict0'])
seq2.cell_list[1].load_state_dict(checkpoint['state_dict1'])
seq2.cell_list[2].load_state_dict(checkpoint['state_dict2'])
seq2.to(seq1.device)
seq2.double()
seq2.eval()
_, _ = test(csv_data=current_data[0], train_size=train_size, test_size=test_size,
data_col=_data_col_list[0], time_col=_timestamp_col_list[0], seq=seq2,
result_file=None, show=0)
checkpoint = torch.load(_ckpt_files[0])
_epochs = checkpoint['num_epochs']
num_hidden = checkpoint['num_hidden']
num_cells = checkpoint['num_cells']
dev = checkpoint['device']
if dev is None:
dev = "cpu"
else:
dev = "gpu"
seq3 = Seq2seq(num_hidden=num_hidden, num_cells=num_cells, device=dev)
seq3.load_state_dict(checkpoint['state_linear'])
seq3.cell_list[0].load_state_dict(checkpoint['state_dict0'])
seq3.cell_list[1].load_state_dict(checkpoint['state_dict1'])
seq3.cell_list[2].load_state_dict(checkpoint['state_dict2'])
seq3.to(seq1.device)
seq3.double()
seq3.eval()
_, _ = test(csv_data=current_data[0], train_size=train_size, test_size=test_size,
data_col=_data_col_list[0], time_col=_timestamp_col_list[0], seq=seq3,
result_file=None, show=0)
Here are the results:-
test loss: 122.80924618395184
Weighted mean absolute error is : 25.6365589979712
input tensor([[ 31.0200, 31.0400, 31.0500, 31.0800, 31.0900, 31.1200,
31.1400, 31.1600, 31.1800, 31.2000, 31.2300, 31.2500,
31.2700, 31.2900, 31.3100, 31.3300, 31.3500, 31.3700,
31.3900, 31.5400, 31.5600, 31.6900, 31.7100, 31.7300,
31.7600, 31.7800, 31.7900, 31.8200, 31.8400, 31.8600,
31.8900, 31.9000, 31.9200, 31.9400, 31.9600, 31.9800,
32.0000, 32.0300, 32.0600, 32.0900]], dtype=torch.float64, device='cuda:0')
forecast tensor([ 39.3661, 38.2005, 36.8705, 36.3623, 36.6181, 37.3330,
38.2346, 39.1504, 39.9915, 40.7231, 41.3416, 41.8541,
42.2743, 42.6168, 42.8953, 43.1216, 43.3058, 43.4561,
43.5792, 43.7020, 43.8089, 43.9155, 44.0072, 44.0820,
44.1438, 44.1943, 44.2343, 44.2692, 44.2998, 44.3269,
44.3529, 44.3754, 44.3959, 44.4151, 44.4335, 44.4513,
44.4686, 44.4871, 44.5071, 44.5282], dtype=torch.float64, device='cuda:0')
test loss: 122.80924618395184
Weighted mean absolute error is : 25.6365589979712
input tensor([[ 31.0200, 31.0400, 31.0500, 31.0800, 31.0900, 31.1200,
31.1400, 31.1600, 31.1800, 31.2000, 31.2300, 31.2500,
31.2700, 31.2900, 31.3100, 31.3300, 31.3500, 31.3700,
31.3900, 31.5400, 31.5600, 31.6900, 31.7100, 31.7300,
31.7600, 31.7800, 31.7900, 31.8200, 31.8400, 31.8600,
31.8900, 31.9000, 31.9200, 31.9400, 31.9600, 31.9800,
32.0000, 32.0300, 32.0600, 32.0900]], dtype=torch.float64, device='cuda:0')
forecast tensor([ 39.3661, 38.2005, 36.8705, 36.3623, 36.6181, 37.3330,
38.2346, 39.1504, 39.9915, 40.7231, 41.3416, 41.8541,
42.2743, 42.6168, 42.8953, 43.1216, 43.3058, 43.4561,
43.5792, 43.7020, 43.8089, 43.9155, 44.0072, 44.0820,
44.1438, 44.1943, 44.2343, 44.2692, 44.2998, 44.3269,
44.3529, 44.3754, 44.3959, 44.4151, 44.4335, 44.4513,
44.4686, 44.4871, 44.5071, 44.5282], dtype=torch.float64, device='cuda:0')
test loss: 122.80924618395184
Weighted mean absolute error is : 25.6365589979712
input tensor([[ 31.0200, 31.0400, 31.0500, 31.0800, 31.0900, 31.1200,
31.1400, 31.1600, 31.1800, 31.2000, 31.2300, 31.2500,
31.2700, 31.2900, 31.3100, 31.3300, 31.3500, 31.3700,
31.3900, 31.5400, 31.5600, 31.6900, 31.7100, 31.7300,
31.7600, 31.7800, 31.7900, 31.8200, 31.8400, 31.8600,
31.8900, 31.9000, 31.9200, 31.9400, 31.9600, 31.9800,
32.0000, 32.0300, 32.0600, 32.0900]], dtype=torch.float64, device='cuda:0')
forecast tensor([ 39.3661, 38.2005, 36.8705, 36.3623, 36.6181, 37.3330,
38.2346, 39.1504, 39.9915, 40.7231, 41.3416, 41.8541,
42.2743, 42.6168, 42.8953, 43.1216, 43.3058, 43.4561,
43.5792, 43.7020, 43.8089, 43.9155, 44.0072, 44.0820,
44.1438, 44.1943, 44.2343, 44.2692, 44.2998, 44.3269,
44.3529, 44.3754, 44.3959, 44.4151, 44.4335, 44.4513,
44.4686, 44.4871, 44.5071, 44.5282], dtype=torch.float64, device='cuda:0')
Process finished with exit code 0
So I think we need to save the multiple LSTM cells’ dicts in separately.