I am currently trying to train a many-to-one LSTM model to predict the quality of an episode. However during the training the loss is decreasing well but the predictions from the model aren’t changing. I was thinking that this could be a result of only using a single dataset, or maybe it could be that my model isn’t calculating the gradients properly because the grad attributes for the forward passes are None. In the end, I have been looking into how to fix this for a while now and I was curious if anyone could help?
Here are all the necessary snippets of the code:
Also for reference the shapes of the data should be well described in the architecture file (lowest code-snippet), however if you have any more questions please let me know!
Here is the main file that does the training
import torch
import copy
import torch.optim as optim
from tqdm import tqdm
import lstm
import numpy as np
from torch.nn.utils.rnn import pad_sequence
DEVICE = torch.device(
'cuda' if torch.cuda.is_available() else 'cpu')
def load_dataset(velocity, gap_size):
"""
Loads the rollouts for each parameter setting (where velocity and gap size are the variable parameters)
"""
rollout_data = np.load(
f"exp_data/exp_vel_{velocity}_gapSize_{gap_size}.npy", allow_pickle=True)
# Iterate over the field names and copy values from the original structured array to the new array
X = []
for episode_idx, x in enumerate(rollout_data):
# stores all the states for each timestep in the episode
chosen_metrics = []
for timestep_idx, full_state_arr in enumerate(x):
# convert each tuple in the structured array
full_state_dict = {}
for field_name in full_state_arr.dtype.names:
full_state_dict[field_name] = torch.squeeze(
torch.from_numpy(full_state_arr[field_name]))
# parse the desired metrics (to be fed into the model) from the full state dictionary
torch_tensor_x = torch.Tensor(size=(1, 2), device=DEVICE)
torch_tensor_x[0][0] = full_state_dict['base_positions'][0]
torch_tensor_x[0][1] = full_state_dict['base_linear_velocities'][0]
chosen_metrics.append(torch_tensor_x)
X.append(copy.deepcopy(chosen_metrics))
# Pad the sequences and stack them into a tensor
max_len = max([len(t) for t_list in X for t in t_list])
padded_tensor_list = []
for t_list in X:
padded_t_list = []
for t in t_list:
padded_t = torch.nn.functional.pad(
t, (0, max_len - len(t)), mode='constant', value=0)
padded_t_list.append(padded_t)
padded_tensor_list.append(padded_t_list)
padded_tensor = pad_sequence(
[torch.stack(t_list) for t_list in padded_tensor_list], batch_first=True)
return padded_tensor, len(X)
def main():
velocity, gap_size = 0.25, 3
num_metrics, num_episodes = 2, 5
# dictionary where the first key is the gap size and the value is a list which holds the indices corresponding to which episodes were preferred
# Picking 2 bad, 2 good, and 2 mediocre episodes to fine tune on
handpicked_episodes = {3: [[], [], [0], [4], [], [], [1], [], [], [1], [], []],
4: [[], [], [], [], [], [4], [], [], [], [], [], []],
5: [[], [], [], [], [], [], [], [], [], [], [], []],
6: [[], [], [], [], [], [], [], [1], [], [], [], []]}
# Gather all the tensor data for each parameter setting into one dataset
X, num_episodes = [], 0
max_timestep_len = 0
while gap_size <= 6:
while velocity <= 3.00:
# TODO change the data organization to be cleaner later if you have time
temp_tensor, temp_num_episodes = load_dataset(
velocity=velocity, gap_size=gap_size)
# After squeezing X is 3D-tensor | shape: (num episodes, max num of trajectories (length of episode, padded as necessary), input_dim (num of metrics chosen from full_state_trajectory))
temp_tensor.squeeze_(2)
max_timestep_len = max(max_timestep_len, temp_tensor.shape[1])
X.append(temp_tensor)
num_episodes += temp_num_episodes
velocity += 0.25
gap_size += 1
velocity = 0.25
# pad the tensors in X then concatenate
padded_tensors = []
for tensor in X:
pad_size = max_timestep_len - tensor.size(1)
padded_tensor = torch.nn.functional.pad(tensor, (0, 0, 0, pad_size))
padded_tensors.append(padded_tensor)
# Concatenate the padded tensors along the first dimension
concatenated_tensors = torch.cat(padded_tensors, dim=0)
# parse the preferred episodes from the dataset
num_parameter_settings, gap_size_key, parameter_settings_count = len(
padded_tensors), 3, 0
X = []
save_plots, handpick = True, True
for i in range(concatenated_tensors.shape[0]):
# check multiple of temp_num_episodes * 12 b/c there are temp_num_episodes in each parameter setting and there are 12
# parameter settings for each gap size
if i % (temp_num_episodes * 12) == 0 and i != 0:
gap_size_key += 1
# for every parameter setting we want to parse the preferred episode
if i % temp_num_episodes == 0:
# check if we ran into an empty list
if handpick:
if handpicked_episodes[gap_size_key][parameter_settings_count % 12]:
preferred_episode_idx = preferred_episodes[gap_size_key][parameter_settings_count % 12][0] + (
parameter_settings_count * temp_num_episodes)
X.append(concatenated_tensors[preferred_episode_idx])
parameter_settings_count += 1
else:
if preferred_episodes[gap_size_key][parameter_settings_count % 12]:
preferred_episode_idx = preferred_episodes[gap_size_key][parameter_settings_count % 12][0] + (
parameter_settings_count * temp_num_episodes)
X.append(concatenated_tensors[preferred_episode_idx])
parameter_settings_count += 1
# convert X into tensor
X = torch.stack(X, dim=0)
X.requires_grad_(True)
batch_size = X.shape[0]
reward_model = lstm.Reward_LSTM(num_metrics, 50, 2, batch_size).to(DEVICE)
optimizer = optim.Adam(reward_model.parameters())
P = np.random.rand(X.shape[0])
# Get the reward for each episode in X
num_epochs = 1000
intermediate_vals, k = torch.ones(
(num_epochs), device=DEVICE, requires_grad=True), 0
for epoch_idx in tqdm(range(num_epochs)):
r = reward_model(X)
r.requires_grad_(True)
with torch.no_grad():
for i in range(batch_size):
for j in range(batch_size):
# compute the loss
if i != j:
k += 1
if P[i] > P[j]:
intermediate_vals[epoch_idx] = -torch.log(
torch.sigmoid((r[i] - r[j])))
else:
intermediate_vals[epoch_idx] = -torch.log(
torch.sigmoid((r[j] - r[i])))
# calculate gradient and update parameters of reward model
optimizer.zero_grad()
loss = torch.mean(intermediate_vals)
loss.requires_grad_(True)
loss.backward(retain_graph=True)
optimizer.step()
if __name__ == "__main__":
main()
Here is the NN architecture file (imported as lstm.py in the training code)
class Reward_LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes) -> None:
super(Reward_LSTM, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size,
num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
out, _ = self.lstm(x, (h0, c0))
# out: batch_size, seq_length, hidden_size
out = out[:, -1, :]
out = self.fc(out)
# remove the 2nd dimension from the output
out = out[0]
return out
Thank you in advance!