I am facing this error in pytorch but if use this model structure in tensorflow, model is getting trained

Ayush_Aditya · January 25, 2024, 6:49pm

class ConvDLSTM(nn.Module):
def init(self):
super(ConvDLSTM, self).init()

    self.conv1 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3,padding=1)
    self.maxpool1 = nn.MaxPool1d(kernel_size=2)
    self.dropout1 = nn.Dropout(0.2)

    self.conv2 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3)
    self.maxpool2 = nn.MaxPool1d(kernel_size=2)
    self.dropout2 = nn.Dropout(0.2)

    self.lstm1 = nn.LSTM(input_size=128, hidden_size=256, batch_first=True)
    self.dropout3 = nn.Dropout(0.2)

    self.lstm2 = nn.LSTM(input_size=256, hidden_size=32, batch_first=True)
    self.dropout4 = nn.Dropout(0.2)

    self.flatten = nn.Flatten()

    self.dense1 = nn.Linear(in_features=32, out_features=128)
    self.dense2 = nn.Linear(in_features=128, out_features=4)

def forward(self, inputs):
    out = self.conv1(inputs)
    out = F.relu(out)
    out = self.maxpool1(out)
    out = self.dropout1(out)

    out = self.conv2(out)
    out = F.relu(out)
    out = self.maxpool2(out)
    out = self.dropout2(out)

    out, _ = self.lstm1(out)
    out = self.dropout3(out)

    out, _ = self.lstm2(out)
    out = self.dropout4(out)

    out = self.flatten(out)

    out = self.dense1(out)
    out = F.relu(out)

    out = self.dense2(out)
    out = F.softmax(out, dim=1)

    return out

RuntimeError Traceback (most recent call last)
Cell In[40], line 1
----> 1 training_val(train_loader=train_loader,val_loader=val_loader,epochs=200,model=model_lstm,criterion=criterion, optimizer=optimizer,model_name=“CONV1d_LSTM”)
2 print(“END \n\n\n\n\n\n\n”)
3 print(‘Start of new model \n\n\n\n\n\n\n’)

Cell In[32], line 17, in training_val(train_loader, val_loader, epochs, model, criterion, optimizer, model_name)
14 batch_Y=batch_Y.to(‘cuda’)
15 print(batch_X.shape)
—> 17 outputs = model(batch_X)
18 loss = criterion(outputs, batch_Y)
19 loss.backward()

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

Cell In[37], line 27, in ConvDLSTM.forward(self, inputs)
25 out = self.conv1(inputs)
26 out = F.relu(out)
—> 27 out = self.maxpool1(out)
28 out = self.dropout1(out)
30 out = self.conv2(out)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/pooling.py:92, in MaxPool1d.forward(self, input)
91 def forward(self, input: Tensor):
—> 92 return F.max_pool1d(input, self.kernel_size, self.stride,
93 self.padding, self.dilation, ceil_mode=self.ceil_mode,
94 return_indices=self.return_indices)

File /opt/conda/lib/python3.10/site-packages/torch/_jit_internal.py:484, in boolean_dispatch..fn(*args, **kwargs)
482 return if_true(*args, **kwargs)
483 else:
→ 484 return if_false(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/torch/nn/functional.py:696, in _max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices)
694 if stride is None:
695 stride = torch.jit.annotate(List[int], )
→ 696 return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)

RuntimeError: Given input size: (128x1x1). Calculated output size: (128x1x0). Output size is too small
add Codeadd Markdown

ptrblck · January 25, 2024, 7:17pm

Your input shape is most likely not the same as the intermediate activation is too small for the failing pooling layer (assuming you have verified the same model architecture is used).

Ayush_Aditya · January 25, 2024, 7:24pm

i used same dataset for both(pytorch and and tensorflow)

my tensorflow code
import tensorflow as tf
class ConvDLSTM(tf.keras.Model):
def init(self):
super(ConvDLSTM, self).init()

    self.conv1 = tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu')
    self.maxpool1 = tf.keras.layers.MaxPooling1D(pool_size=2)
    self.dropout1 = tf.keras.layers.Dropout(0.2)

    self.conv2 = tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu')
    self.maxpool2 = tf.keras.layers.MaxPooling1D(pool_size=2)
    self.dropout2 = tf.keras.layers.Dropout(0.2)

    self.lstm1 = tf.keras.layers.LSTM(units=256, return_sequences=True, return_state=True)
    self.dropout3 = tf.keras.layers.Dropout(0.2)

    self.lstm2 = tf.keras.layers.LSTM(units=32, return_sequences=True, return_state=True)
    self.dropout4 = tf.keras.layers.Dropout(0.2)

    self.flatten = tf.keras.layers.Flatten()

    self.dense1 = tf.keras.layers.Dense(units=128, activation='relu')
    self.dense2 = tf.keras.layers.Dense(units=4, activation='softmax')

def call(self, inputs):
    out = self.conv1(inputs)
    out = self.maxpool1(out)
    out = self.dropout1(out)

    out = self.conv2(out)
    out = self.maxpool2(out)
    out = self.dropout2(out)

    out, _, _ = self.lstm1(out)
    out = self.dropout3(out)

    out, _, _ = self.lstm2(out)
    out = self.dropout4(out)

    out = self.flatten(out)

    out = self.dense1(out)
    out = self.dense2(out)

    return out

model = ConvDLSTM()
model.build((None, 256, 1)) # Input shape (batch_size, sequence_length, features)
model.summary()
model.compile(optimizer=‘adam’, loss=‘categorical_crossentropy’, metrics=[‘accuracy’])
model.fit(train_data, train_label, epochs=10, batch_size=64, validation_data=(val_data, val_label))

Pytorch dataset
batch_size = 64

train_dataset = TensorDataset(train_data, train_label)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(val_data, val_label)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

And the model architecture to shape of the training dataset is based on a research paper

brighteningeyes · January 25, 2024, 7:25pm

hello.
conv1d expects shapes to be in ncw (for your case batch x channel x time) while lstm expects it to be in ntc (for your case batch x time x channel). so, you need to change the order of the input to lstm like this:
x = torch.permute(x, [0, 2, 1])
before passing it to lstm.

Ayush_Aditya · January 25, 2024, 7:28pm

it is based on the research paper they wrote in this manner only… my actual dimension of the dataset was 1280X8064X14 but the paper said to reshape and preprocess it in the shape of 555520X256X1 and use this dataset to train

brighteningeyes · January 26, 2024, 1:21pm

the thing is, tensorflow/keras by default expects the data to be in channels last format, while in pytorch they should be channels first
tensorflow:
batch x time x channel
pytorch:
batch x channel x time
in both of them, lstm expects the data to be in channels last format
batch x time x channel
or when batch_first = false,
time x batch x channel

Ayush_Aditya · January 26, 2024, 2:32pm

i didn’t understand can you elaborate it a bit …