I have trained an LSTM model, and now am trying to use it in a script to make actual predictions. For some reason, if I call model.eval() after loading the model, every single prediction regardless of input data is identical (not just similar, exactly the same bits). If I do not call model.eval() I get results similar to what my validation loss was showing during training. My understanding is that this means dropout will still be active during predictions, which is not desirable. What follows is my model class:
import torch.nn as nn
import torch.nn.functional as F
import torch
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, lstm_dropout, fc_dropout, fc1_size=32, fc2_size=16, sequence_length=199):
super(LSTMModel, self).__init__()
self.lstm = nn.LSTM(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
dropout=lstm_dropout
)
self.sequence_length = sequence_length
self.position_embeddings = nn.Parameter(torch.zeros(1, sequence_length, input_size))
self.attention_layer = nn.Linear(hidden_size, 1)
self.dropout = nn.Dropout(fc_dropout)
self.fc1 = nn.Linear(hidden_size, fc1_size)
self.bn1 = nn.LayerNorm(fc1_size)
self.bn2 = nn.LayerNorm(fc2_size)
self.fc2 = nn.Linear(fc1_size, fc2_size)
self.fc3 = nn.Linear(fc2_size, 1)
self.init_weights()
def forward(self, x):
batch_size = x.size(0)
position_embeddings = self.position_embeddings.repeat(batch_size, 1, 1)
x = x + position_embeddings
out, _ = self.lstm(x)
attn_scores = self.attention_layer(out)
attn_scores = attn_scores.squeeze(-1)
device = x.device
position_ids = torch.arange(1, self.sequence_length + 1, dtype=torch.float, device=device)
bias = position_ids / position_ids.sum()
bias = bias.unsqueeze(0)
attn_scores = attn_scores + bias
attn_weights = F.softmax(attn_scores, dim=1)
context = torch.bmm(attn_weights.unsqueeze(1), out)
context = context.squeeze(1)
out = self.fc1(context)
out = self.bn1(out)
out = F.leaky_relu(out)
out = self.dropout(out)
out = self.fc2(out)
out = self.bn2(out)
out = F.leaky_relu(out)
out = self.dropout(out)
out = self.fc3(out)
out = out.squeeze(-1)
return out
def init_weights(self):
with torch.no_grad():
for name, param in self.lstm.named_parameters():
if 'weight_ih' in name:
nn.init.xavier_uniform_(param)
elif 'weight_hh' in name:
nn.init.orthogonal_(param)
elif 'bias' in name:
param.fill_(0)
nn.init.xavier_uniform_(self.attention_layer.weight)
position_ids = torch.arange(1, self.sequence_length + 1, dtype=torch.float)
bias = position_ids / position_ids.sum()
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.xavier_uniform_(self.fc2.weight)
nn.init.xavier_uniform_(self.fc3.weight)
self.fc1.bias.fill_(0)
self.fc2.bias.fill_(0)
self.fc3.bias.fill_(0)
The following is the code I am using to get the predictions
model_to_load = 'ch_ver-181_ep-3_sub-0.pth'
model:LSTMModel = load_model(model_to_load)
model.to(torch.device('cuda'))
model.eval()
with torch.no_grad():
predictions = []
dataloader = DataLoader(DatasetWrapper(feature_list), batch_size=2)
for input in dataloader:
output = model(input)
unscaled = target_scaler.inverse_transform(output.cpu().numpy().reshape(-1,1))
for result in unscaled:
predictions.append(result[0])
As is, every single prediction is identical. If I comment out model.eval(), I get predictions as expected.