Hi all,
This is my first time using PyTorch and would like to train a CNN -> BiLSTM -> CTC output.
Below is my code for the model, is it implemented correctly? Iām getting the following error.
RuntimeError: input must have 3 dimensions, got 2
for the line out, _ = self.lstm1(out)
Cheers
class ConvNetToBiLSTM(nn.Module):
def __init__(self):
super(ConvNetToBiLSTM, self).__init__()
# 32 by 256
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
# 16 by 128
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.drop_out = nn.Dropout()
# 8 by 64
self.fc1 = nn.Linear(8 * 64 * 64, 1024)
self.layer_norm = nn.LayerNorm(1024)
self.lstm1 = nn.LSTM(input_size=1024, hidden_size=100, num_layers=1, bidirectional=True)
self.dropout = nn.Dropout()
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.layer_norm(out)
out = F.gelu(out)
out, _ = self.lstm1(out)
out = self.dropout(out)
return out