If I remove 2 lines h0=torch.zeros.. c0=torch.zeros
and batch_first=True
my network stops learning.
I thought that a zero initial hidden state is by default in nn.LSTM if you don’t pass in a hidden state .
class ModelLSTMFSM(nn.Module):
def __init__(self, input_size=MAX_STRING_SIZE, hidden_size=256, num_layers=2, states_size=MAX_STATES_SIZE):
super(ModelLSTMFSM, self).__init__()
self.states_size = states_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, self.states_size * self.states_size * 2)
def forward(self, x):
x = x.reshape(-1, INPUT_SIZE, MAX_STRING_SIZE)
# Set initial hidden and cell states
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
# Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size)
# Decode the hidden state of the last time step
out = self.fc(out[:, -1, :])
return out.reshape(-1, self.states_size, self.states_size, 2)