How does one make a bidirectional RNN if one is processing a sequence token by token?

How does one make a bidirectional RNN if one is processing a sequence token by token? Do I have to use a hardcoded for loop or is the bidirectional flag essentially useless?

Does things change if my RNN is to some degree custom (but has LSTM/GRU in it)?


Some example code of how I am doing one direction:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from pdb import set_trace as st

torch.manual_seed(1)

def step_by_step(net,sequence,hidden):
    '''
    an example of an LSTM processing all the sequence one token at a time (one time step at a time)
    '''
    ## process sequence one element at a time
    print()
    print('start processing sequence')
    for i, token in enumerate(sequence):
        print(f'-- i = {i}')
        #print(f'token.size() = {token.size()}')
        ## to add fake batch_size and fake seq_len
        h_n, c_n = hidden # hidden states, cell state
        processed_token = token.view(1, 1, -1) # torch.Size([1, 1, 3])
        print(f'processed_token.size() = {processed_token.size()}')
        print(f'h_n.size() = {h_n.size()}')
        #print(f'processed_token = {processed_token}')
        #print(f'h_n = {h_n}')
        # after each step, hidden contains the hidden state.
        out, hidden = lstm(processed_token, hidden)
    ## print results
    print()
    print(out)
    print(hidden)

if __name__ == '__main__':
    ## model params
    hidden_size = 6
    input_size = 3
    lstm = nn.LSTM(input_size=input_size,hidden_size=hidden_size)
    ## make a sequence of length Tx (list of Tx tensors)
    Tx = 5
    sequence = [torch.randn(1, input_size) for _ in range(Tx)]  # make a sequence of length 5
    ## initialize the hidden state.
    hidden = (torch.randn(1, 1, hidden_size), torch.randn(1, 1, hidden_size))
    step_by_step(lstm,sequence,hidden)
    print('DONE \a')