This is my first time using PyTorch and would like to train a CNN -> BiLSTM -> CTC output.
Below is my code for the model, is it implemented correctly? I’m getting the following error.
RuntimeError: input must have 3 dimensions, got 2 for the line
out, _ = self.lstm1(out)
class ConvNetToBiLSTM(nn.Module): def __init__(self): super(ConvNetToBiLSTM, self).__init__() # 32 by 256 self.layer1 = nn.Sequential( nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) # 16 by 128 self.layer2 = nn.Sequential( nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.drop_out = nn.Dropout() # 8 by 64 self.fc1 = nn.Linear(8 * 64 * 64, 1024) self.layer_norm = nn.LayerNorm(1024) self.lstm1 = nn.LSTM(input_size=1024, hidden_size=100, num_layers=1, bidirectional=True) self.dropout = nn.Dropout() def forward(self, x): out = self.layer1(x) out = self.layer2(out) out = out.reshape(out.size(0), -1) out = self.drop_out(out) out = self.fc1(out) out = self.layer_norm(out) out = F.gelu(out) out, _ = self.lstm1(out) out = self.dropout(out) return out