Implementation of Tensorflow LSTM model for timeseries in pytorch

Hi I found the following LSTM architecture for time series prediction from Coursera (in tensorflow) and was wondering how to implement it in Pytorch.

The model works on a sliding window where each sequence (of length window size) is input into the model and it predicts the entire sequence and you end up taking the last value as the next prediction.

I believe I have got the first couple of layers correct (I maybe wrong). However I am not sure how to pass the output from the LSTM layer to the fully connected layer.

Here for this example I have initialised a tensor (x) which can be thought as a single sequence of data (batch = 1)

Any idea how I can pass the values from the second LSTM layer to the fully connected layer?. Also it would be nice if I can know what I have implemented so far is correct.

Thanks

Tensorflow Model

from tensorflow.keras.layers import Conv1D, LSTM, Dense, Lambda
from tensorflow.keras.models import Sequential

(window_size, batch_size, shuffle_buffer_size) = (30,32,1000)

model =  Sequential([
    Conv1D(filters = 64, kernel_size = 3, strides = 1, activation = "relu", padding = "causal", input_shape = [window_size, 1]),
    LSTM(64, return_sequences = True),
    LSTM(64),
    Dense(30, activation = "relu"),
    Dense(30, activation = "relu"),
    Dense(1),
    Lambda(lambda x: x * 400)
])

Implementation in PyTorch

import torch
import torch.nn as nn
import torch.nn.functional as F

#input data
x = torch.rand((32,30), dtype = torch.float32).unsqueeze(0)

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM,self).__init__():
        #in : (batch, sequence, channel) ---> Out : (batch, channel, sequence) 
        #in shape (1,32,30) ---> out shape (1,64,32)
        self.conv = nn.Conv1d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, padding = 2)

        #in : (batch, sequence, input_size/features) ---> Out : (batch, sequence, channel)
        #in shape (1,64,32) ---> out shape (1,32,64)
        self.lstm1 = nn.LSTM(input_size = 64, hidden_size = 64, batch_first = True)

        #in : (batch, sequence, input_size/features) ---> Out : (batch, sequence, channel)
        #in shape (1,32,64) ---> out shape (1,32,64)
        self.lstm2 = nn.LSTM(input_size = 64, hidden_size = 64, batch_first = True)

        # ---------------------------------------------------- ??? ------------------------------------------------
        #in : (*, in_features) ---> out : (*, out_features)
        #in shape (1,32,64) ---> out shape (1,???) - NOT SURE
        # NOT SURE WHAT TO INPUT AS THE in_features
        self.fc1 = nn.Linear(in_features = 32 * 64, out_features = 30)
        # ---------------------------------------------------- --- -----------------------------------------------
        
        #in : (*, in_features) ---> out : (*, out_features)
        self.fc2 = nn.Linear(in_features = 30, out_features = 30)

        #in : (*, in_features) ---> out : (*, out_features)
        self.fc3 = nn.Linear(in_features = 30, out_features = 1)

    def forward(self,x):
        out = self.conv(x)
        out,ht1 = self.LSTM(out.permute(0,2,1)
        out,ht2 = self.LSTM(out)
        # ---------------------------------------------- ??? --------------------------------------------------------
        # AGAIN THIS WHERE I AM NOT SURE, BUT GENERALLY YOU UNROLL THE TENSOR BEFORE FEEDING INTO FC-LAYER
        out = out.view(-1,64)
        out = F.relu(self.fc1(out))
        # -------------------------------------------------------------------------------------------------------
        # ...