I did the same method for saving the best model, but I have a weird issue!
When I load the saved model for later prediction (inference), the model output is actually random! I checked the weight of the best_model and the loaded_model manually and it seems everything is ok but the predictions by loaded_model are totally incorrect while the one in the Jupyter Notebook
works fine.
This is my model architecture:
class Char2Vec(nn.Module):
def __init__(self, vocab_size, embed_dim, out_ch1= CFG.out_ch1, out_ch2= CFG.out_ch2):
super().__init__()
self.out_ch1, self.out_ch2 = out_ch1, out_ch2
self.embeds = nn.Embedding(vocab_size, embed_dim, padding_idx=0) # first embedding layer for characters
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels=embed_dim, out_channels=out_ch1, kernel_size=3),
nn.ReLU(),
nn.Dropout(.1),
)
self.convs2 = nn.ModuleList(
[
nn.Sequential(
nn.Conv1d(out_ch1, out_ch2//3, kernel_size=k),
nn.ReLU(),
)
for k in [3, 4, 5]
]
)
self.linear = nn.Sequential(
nn.Linear(out_ch2, out_ch2),
nn.ReLU(),
)
def forward(self, word):
embeds = self.embeds(word).transpose(-2,-1)
batch, sent, emb, seq = embeds.shape
conv1 = self.conv1(embeds.view(-1, emb, seq))
tmp = [cnn(conv1).max(dim=-1)[0].squeeze() for cnn in self.convs2]
conv2 = torch.cat(tmp, dim=1)
lin = self.linear(conv2)
return (lin+conv2).view(batch, sent, -1)
class BiLSTMtagger(nn.Module):
def __init__(self, embedding_dim, hidden_dim, tagset_size):
super().__init__()
self.hidden_dim = hidden_dim
self.word_embeddings = Char2Vec(Data.char_vocab_size, Data.d)
self.lstm = nn.LSTM(
input_size = embedding_dim,
hidden_size = hidden_dim,
num_layers = 2,
batch_first = True,
bidirectional = True,
dropout = 0.3
)
self.hidden2tag = nn.Linear(hidden_dim*2, tagset_size)
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
lstm_out, _ = self.lstm(embeds)
tag_space = self.hidden2tag(lstm_out)
return F.log_softmax(tag_space, dim=1)
I save it like the following:
torch.save(best_model.state_dict(), 'bestmodel.pt')
and then for prediction, I do something like the below:
model = BiLSTMtagger(EMBEDDING_DIM, HIDDEN_DIM, TAGSET_SIZE)
state = torch.load('bestmodel.pt')
model.load_state_dict(state)
model.eval()
out = model(x).argmax(dim=-1)[0].tolist()
print(out)
I am so confused, and Iāve struggled with it the whole day without any success. I appreciate any help.