The initialization of the LSTM model is not understood

Sjk_Jae · March 30, 2021, 12:33pm

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embed = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        out = self.embed(x)
        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
        out = self.fc(out.reshape(out.shape[0], -1))
        return out, (hidden, cell)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        return hidden, cell

self.hidden_size = hidden_size
self.num_layers = num_layers
Both hidden_size and num_layers are initialized, why is input_size not initialized?

omarfoq · March 30, 2021, 12:48pm

Hi,

You don’t need input size elsewhere in your class, while you need num_layers and hidden_size in init_hidden.

Sjk_Jae · March 31, 2021, 1:51am

Got it, thank you