How does one make a bidirectional RNN if one is processing a sequence token by token? Do I have to use a hardcoded for loop or is the bidirectional flag essentially useless?
Does things change if my RNN is to some degree custom (but has LSTM/GRU in it)?
Some example code of how I am doing one direction:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from pdb import set_trace as st
torch.manual_seed(1)
def step_by_step(net,sequence,hidden):
'''
an example of an LSTM processing all the sequence one token at a time (one time step at a time)
'''
## process sequence one element at a time
print()
print('start processing sequence')
for i, token in enumerate(sequence):
print(f'-- i = {i}')
#print(f'token.size() = {token.size()}')
## to add fake batch_size and fake seq_len
h_n, c_n = hidden # hidden states, cell state
processed_token = token.view(1, 1, -1) # torch.Size([1, 1, 3])
print(f'processed_token.size() = {processed_token.size()}')
print(f'h_n.size() = {h_n.size()}')
#print(f'processed_token = {processed_token}')
#print(f'h_n = {h_n}')
# after each step, hidden contains the hidden state.
out, hidden = lstm(processed_token, hidden)
## print results
print()
print(out)
print(hidden)
if __name__ == '__main__':
## model params
hidden_size = 6
input_size = 3
lstm = nn.LSTM(input_size=input_size,hidden_size=hidden_size)
## make a sequence of length Tx (list of Tx tensors)
Tx = 5
sequence = [torch.randn(1, input_size) for _ in range(Tx)] # make a sequence of length 5
## initialize the hidden state.
hidden = (torch.randn(1, 1, hidden_size), torch.randn(1, 1, hidden_size))
step_by_step(lstm,sequence,hidden)
print('DONE \a')