How to initialize hidden state in GRU

I set the initial hidden state in the forward function. Does it work?

class MV_GRU(torch.nn.Module):
    def __init__(self, n_features, seq_length, num_hiddens, hidden_layers):
        super(MV_GRU, self).__init__()
        self.n_features = n_features
        self.seq_len = seq_length
        self.n_hidden = num_hiddens  # number of hidden states
        self.n_layers = hidden_layers  # number of GRU layers (stacked)


        self.l_gru = torch.nn.GRU(input_size=n_features,
                                  hidden_size=self.n_hidden,
                                  num_layers=self.n_layers,
                                  batch_first=True)
        self.l_linear = torch.nn.Linear(self.n_hidden * self.seq_len, 1)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        self.hidden = torch.zeros(self.n_layers, batch_size, self.n_hidden).to(device)
        gru_out, self.hidden = self.l_gru(x, self.hidden)
        x = gru_out.contiguous().view(batch_size, -1)
        return self.l_linear(x)

Yes, you can initialize the hidden state in the forward method, but note that you have created a recursion, which might yield errors. Currently the computation graph will be attached to self.hidden so that the backward call might try to backpropagate through multiple iterations. This could be a valid use case, but you should check if this fits your use case or if you want to detach() the hidden state in each forward pass.