When trying to use a LSTM model for regression, I find that I am getting NaN values when I print out training and testing loss. The DataFrame I pass into the model has no NaN values, so I believe it is an issue with my model or my training/testing loop functions. Any help in this regard would be greatly appreciated.
Model Class:
class PKLSTM(nn.Module):
def __init__(self, input_size, hidden_units, num_layers):
super(PKLSTM, self).__init__()
self.input_size = input_size
self.hidden_units = hidden_units
self.num_layers = num_layers
self.lstm = nn.LSTM(
input_size=input_size,
hidden_size=hidden_units,
batch_first=True,
num_layers=num_layers
)
self.linear = nn.Linear(hidden_units, 1)
def forward(self, x):
batch_size = x.shape[0]
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
_, (hn, _) = self.lstm(x, (h0, c0))
out = self.linear(hn[0]).flatten()
return out
Training and testing loop:
def train_model(data_loader, model, loss_function, optimizer):
num_batches = len(data_loader)
total_loss = 0
model.train()
for X, y in data_loader:
output = model(X)
loss = loss_function(output, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / num_batches
print(f"Train loss: {avg_loss}")
def test_model(data_loader, model, loss_function):
num_batches = len(data_loader)
# print('Num of batches', num_batches)
total_loss = 0
model.eval()
with torch.no_grad():
for X, y in data_loader:
# print('input: ', X)
output = model(X)
# print('output: ', output)
total_loss += loss_function(output, y).item()
avg_loss = total_loss / num_batches
print(f"Test loss: {avg_loss}")