Hi to all!
It’s possible to stack Bidirectional GRUs with different hidden size and also do a residual connection with the ‘L-2 layer’ output without losing the time coherence ??
I.E.:
class Encoder(nn.Module):
def __init__(self, input_size_encoder,hidden_size,bidirectional):
super(Encoder, self).__init__()
self.hidden_size = hidden_size
self.input_size_encoder = input_size_encoder
self.bidirectional = bidirectional
# input Layer N (iLN) | output Layer N (oLN)
self.iL1= self.input_size_encoder
self.oL1= self.hidden_size
self.iL2= self.iL1*2 + self.oL1*2 #(Bidirectional)
self.oL2= self.iL2
self.iL3= self.oL2*2 + self.oL1*2 + self.iL1*2 #(Bidirectional)
self.oL3= self.iL3
self.gru1 = nn.GRU(self.iL1,self.oL1, batch_first=True, bidirectional= self.bidirectional)
self.gru2 = nn.GRU(self.iL2,self.oL2, batch_first=True, bidirectional = self.bidirectional)
self.gru3 = nn.GRU(self.iL3,self.oL3, batch_first=True, bidirectional = self.bidirectional)
def forward(self, x):
output1, h_n1 = self.gru1(x)
#concatenate x to fw & bw (out1)
fw1_res = torch.cat((x,output1[:,:,self.oL1:]),dim=2) # x,L1
bw1_res = torch.cat((output1[:,:,:self.oL1]),x),dim=2)# L1,x
output1_residual = torch.cat((fw1_res,bw1_res)),dim=2)
output2, h_n2 = self.gru2(output1_residual)
#concatenate x&out1 to fw & bw (out2)
fw2_res = torch.cat((x,output1[:,:,self.oL1:],output2[:,:,self.oL2:]),dim=2) # x,L1,L2
bw2_res = torch.cat((output2[:,:,:self.oL2],output1[:,:self.oL1]),x),dim=2) # L2,L1,x
output2_residual = torch.cat((fw2_res,bw2_res)),dim=2)
output3, h_n3 = self.gru3(output2_residual)
return output3,[h_n1,h_n2,h_n3]
How could I say to nn.GRU() that his input comes from bidirectional outputs?
Thank you !