LSTM shape problem for time series feature extraction

Hi,
I am trying to implement a feature extractor LSTM network.
The main architecture of my network was:

FeatureExtractorNetworkLSTM(
  (fenet): ModuleList(
    (0): LSTM(18, 256)
    (1): Dropout(p=0.3, inplace=False)
    (2): LSTM(256, 256)
    (3): Dropout(p=0.3, inplace=False)
    (4): LSTM(256, 256)
    (5): Dropout(p=0.3, inplace=False)
    (6): LSTM(256, 256)
    (7): Dropout(p=0.3, inplace=False)
    (8): LSTM(256, 256)
    (9): Dropout(p=0.3, inplace=False)
    (10): LSTM(256, 256)
  )
)


And then changed the architecture to:

FeatureExtractorNetworkLSTM(
  (fenet): ModuleList(
    (0): LSTM(18, 18)
    (1): Dropout(p=0.3, inplace=False)
    (2): LSTM(256, 256)
    (3): Dropout(p=0.3, inplace=False)
    (4): LSTM(256, 256)
    (5): Dropout(p=0.3, inplace=False)
    (6): LSTM(256, 256)
    (7): Dropout(p=0.3, inplace=False)
    (8): LSTM(256, 256)
    (9): Dropout(p=0.3, inplace=False)
    (10): LSTM(256, 256)
  )
)

but again this error is shown:

  • RuntimeError : input.size(-1) must be equal to input_size. Expected 18, got 1*

I would appreciate if you can help me how should I implement the network.
Note that using batch is not necessity.

The code of network:

class FeatureExtractorNetworkLSTM(nn.Module):
    def __init__(self, 
                input_size=256, hidden_size=256, output_size=64,
                alpha=0.001,
                n_layers=3, dropout_prob=0,
                optimizer='Adam', chkpt_dir='runs/ppo'):
        super(FeatureExtractorNetworkLSTM, self).__init__()

        self.checkpoint_file = os.path.join(chkpt_dir, 'fe_LSTM_ppo')
        self.fenet = nn.ModuleList()
        for n in range(n_layers):
            if n == 0:
                self.fenet.append(nn.LSTM(input_size, input_size, 1))

            #input_size = input_size if n == 0 else hidden_size
            elif n == 1:
                self.fenet.append(nn.LSTM(input_size, hidden_size, 1))

                if dropout_prob > 0:
                    self.fenet.append(nn.Dropout(dropout_prob))

            else:
                self.fenet.append(nn.LSTM(hidden_size, hidden_size, 1))

                if dropout_prob > 0:
                    self.fenet.append(nn.Dropout(dropout_prob))
            
            
        
        
        self.fenet.append(nn.LSTM(hidden_size, output_size, 1))
        
        #self.fenet.append(nn.Linear(2*hidden_size, 1))
        
        if optimizer == 'Adam':
            self.optimizer = torch.optim.Adam(self.parameters(), lr=alpha)
        if optimizer == 'Adagrad':
            self.optimizer = torch.optim.Adagrad(self.parameters(), lr=alpha)
        
        if optimizer == 'RMSprop':
            self.optimizer = torch.optim.RMSprop(self.parameters(), lr=alpha)
        
        self.device = torch.device(
            'cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        #dist = self.actor(state)
        for i, l in enumerate(self.fenet):
            f_state = l(state)
        #dist = torch.distributions.categorical.Categorical(dist)

        return f_state

Sample of input tensor:

tensor([[ 1.0658e+00,  1.0675e+00,  1.0637e+00,  1.0652e+00,  1.4717e+04,
          6.2318e+01,  6.0329e+01,  5.8142e+01,  1.0639e+00,  1.0592e+00,
          1.0549e+00,  6.2538e+01,  2.6001e-03,  2.6311e-04,  2.3370e-03,
          1.0681e+00,  1.0558e+00,  1.0619e+00],
        [ 1.0652e+00,  1.0667e+00,  1.0647e+00,  1.0665e+00,  1.1889e+04,
          6.5088e+01,  6.2078e+01,  5.9319e+01,  1.0643e+00,  1.0595e+00,
          1.0550e+00,  6.3402e+01,  2.6151e-03,  2.2250e-04,  2.3926e-03,
          1.0684e+00,  1.0564e+00,  1.0624e+00],
        [ 1.0665e+00,  1.0668e+00,  1.0645e+00,  1.0648e+00,  7.0910e+03,
          5.8982e+01,  5.8555e+01,  5.7163e+01,  1.0644e+00,  1.0597e+00,
          1.0551e+00,  5.8190e+01,  2.4615e-03,  5.5073e-05,  2.4064e-03,
          1.0685e+00,  1.0569e+00,  1.0627e+00],
        [ 1.0648e+00,  1.0651e+00,  1.0630e+00,  1.0641e+00,  7.8530e+03,
          5.6691e+01,  5.7200e+01,  5.6324e+01,  1.0643e+00,  1.0599e+00,
          1.0552e+00,  5.7425e+01,  2.2588e-03, -1.1809e-04,  2.3769e-03,
          1.0685e+00,  1.0576e+00,  1.0630e+00],
        [ 1.0641e+00,  1.0642e+00,  1.0621e+00,  1.0626e+00,  6.8300e+03,
          5.1931e+01,  5.4325e+01,  5.4523e+01,  1.0641e+00,  1.0600e+00,
          1.0553e+00,  5.3252e+01,  1.9554e-03, -3.3719e-04,  2.2926e-03,
          1.0684e+00,  1.0580e+00,  1.0632e+00],
        [ 1.0626e+00,  1.0628e+00,  1.0610e+00,  1.0623e+00,  6.1390e+03,
          5.0850e+01,  5.3664e+01,  5.4107e+01,  1.0638e+00,  1.0601e+00,
          1.0553e+00,  4.8368e+01,  1.6675e-03, -5.0007e-04,  2.1676e-03,
          1.0681e+00,  1.0587e+00,  1.0634e+00]], device='cuda:0')

and the current input tensor shape is:

torch.Size([6, 18])

Complete traceback:

RuntimeError                              Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4592/1425274335.py in <module>
      7     for t in range(len(env.data)):  # while not env.done
      8         # Select and perform an action
----> 9         action, probs, value = agent.choose_action(state)
     10         reward, done, _ = env.step(action)
     11 

c:\analytics_ai\analytics_ai\trading_bots\fe_ppo_agent.py in choose_action(self, state)
    146             self.actor.device)
    147 
--> 148         fe_states = self.fe_net(state)
    149 
    150         dist = self.actor(fe_states)

C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

c:\analytics_ai\analytics_ai\trading_bots\models.py in forward(self, state)
    418         #dist = self.actor(state)
    419         for i, l in enumerate(self.fenet):
--> 420             f_state = l(state)
    421         #dist = torch.distributions.categorical.Categorical(dist)
    422 

C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
    687             hx = self.permute_hidden(hx, sorted_indices)
    688 
--> 689         self.check_forward_args(input, hx, batch_sizes)
    690         if batch_sizes is None:
    691             result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,

C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\rnn.py in check_forward_args(self, input, hidden, batch_sizes)
    630                            batch_sizes: Optional[Tensor],
    631                            ):
--> 632         self.check_input(input, batch_sizes)
    633         self.check_hidden_size(hidden[0], self.get_expected_hidden_size(input, batch_sizes),
    634                                'Expected hidden[0] size {}, got {}')

C:\Anaconda3\envs\torchgpu\lib\site-packages\torch\nn\modules\rnn.py in check_input(self, input, batch_sizes)
    203                     expected_input_dim, input.dim()))
    204         if self.input_size != input.size(-1):
--> 205             raise RuntimeError(
    206                 'input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
    207                     self.input_size, input.size(-1)))

RuntimeError: input.size(-1) must be equal to input_size. Expected 18, got 1

I also checked it with torch.unsqueeze(0) which converts the shape to:

torch.Size([1, 6, 18])

but did not worked.
Thanks

@ptrblck Can you please help me?

Based on your code it seems you are iterating the modules in self.fenet and only keep the very last output using the same input for all layers. This would discard all outputs from previous layers besides the very last one so are you sure this is what you want?
If you want to pass the output of the previous layer to the next one I would guess the mismatch of the output features from the first layer and the expected input feature of the second and following layers would raise the shape mismatch.

Thanks, I changed the entire LSTM Module and used default LSTM without nn.Module. The problem solved.