Hi,
I’m trying to implement a basic LSTM model for a multivariate time series problem. The problem is that the model always predicts the same number, and when training, the loss is almost constant through all epochs. Now I’m questioning if my model definition and training loop are correct.
I would appreciate it if anyone could give me a hint in case there is something wrong with my code.
To provide a bit more context the input to the network is in the format: [batch_size, seq_length, num_features]
and the target: [batch_size, 1]
. The target variable is numbers in descending order. So, given an input sequence, I would like to predict a number that should be the smallest number in the target array.
Here is my model definition:
class RUL_Estimator(nn.Module):
def __init__(self, n_features, hidden_dim, seq_length, num_layers=2, output_dim=1):
super(RUL_Estimator, self).__init__()
self.hidden_dim = hidden_dim
self.seq_length = seq_length
self.num_layers = num_layers
# Define the LSTM layers
self.lstm = nn.LSTM( input_size=n_features, hidden_size=self.hidden_dim,
num_layers=self.num_layers, batch_first=True, dropout=0.2
)
self.linear = nn.Linear(in_features=self.hidden_dim, out_features=output_dim)
def forward(self, input):
lstm_out, _ = self.lstm(input)
# I read that here I should be interested in the data corresponding to the last
# time step of the sequence, that's why: lstm_out[:, -1, :]
pred = self.linear(lstm_out[:, -1, :])
return pred
Here is my training function:
# Training Function
def train_model(model, loss_function, optimizer, num_epochs=25):
since = time.time()
best_loss = 1e10
best_model_wts = copy.deepcopy(model.state_dict())
loss_history = {'train': [], 'val': []}
for epoch in range(1, num_epochs+1):
print('\nEpoch {}/{}'.format(epoch, num_epochs))
print('-' * 10)
# Each epoch has a training phase and a validation phase at every 10 epochs
for phase in ['train', 'val']:
# Set model to training or evaluation mode
model.train() if phase == 'train' else model.eval()
running_loss = 0.0
# Iterate over data.
for inputs, labels in tqdm(dataloaders[phase]):
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs.float())
loss = loss_function(outputs, labels.float())
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
running_loss += loss.item()
epoch_loss = running_loss / len(dataloaders[phase])
loss_history[phase].append(epoch_loss)
if epoch % 5 == 0:
if phase == 'train':
train_stats = '{} ==> Loss: {:.4f}'.format(phase.upper(), epoch_loss)
else:
# print(train_stats)
print('\n'+train_stats+' -- {} ==> Loss: {:.4f}'.format(phase.upper(), epoch_loss))
# deep copy the model
if phase == 'val' and epoch_loss < best_loss:
best_loss = epoch_loss
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model.state_dict(), 'models/LSTM_v0.pth')
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
# load best model weights
model.load_state_dict(best_model_wts)
return model, loss_history
Thank you very much for your help.