I use CRNN in https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py. The training process is normal.
But for tensorrt deployment, when I change the batch_first=True in LSTM,the loss does not decrease.
WHY?
class CRNN(nn.Module):
def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
super(CRNN, self).__init__()
assert imgH % 16 == 0, 'imgH has to be a multiple of 16'
# ks = [3, 3, 3, 3, 3, 3, 2]
# ps = [1, 1, 1, 1, 1, 1, 0]
# ss = [1, 1, 1, 1, 1, 1, 1]
ks = [3, 3, 3, 3, 3]
ps = [1, 1, 1, 1, 0]
ss = [1, 1, 1, 1, 1]
# nm = [64, 128, 256, 256, 512, 512, 512]
nm = [64, 128, 128, 256, 256]
cnn = nn.Sequential()
def convRelu(i, batchNormalization=False):
nIn = nc if i == 0 else nm[i - 1]
nOut = nm[i]
cnn.add_module('conv{0}'.format(i),
nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
if batchNormalization:
cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
if leakyRelu:
cnn.add_module('relu{0}'.format(i),
nn.LeakyReLU(0.2, inplace=True))
else:
cnn.add_module('relu{0}'.format(i), nn.ReLU(True))
convRelu(0)
cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64
# convRelu(1)
# cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32
convRelu(1, True)
convRelu(2)
cnn.add_module('pooling{0}'.format(2),
nn.MaxPool2d((3, 2), (2, 1), (0, 1))) # 256x4x16
convRelu(3, True)
# convRelu(5)
cnn.add_module('pooling{0}'.format(3),
nn.MaxPool2d((3, 2), (2, 1), (0, 1))) # 512x2x16
convRelu(4, True) # 512x1x16
self.cnn = cnn
# self.rnn = BidirectionalLSTM(256, nh, nclass)
self.rnn = nn.LSTM(256, nh,2,bidirectional=True, batch_first=True)
self.linear = nn.Linear(nh * 2, nclass, bias=True)
'''
self.rnn = nn.Sequential(
BidirectionalLSTM(512, nh, nh),
BidirectionalLSTM(nh, nh, nclass))
'''
def forward(self, input):
# conv features
cnn = self.cnn(input)
# print(cnn.size(),cnn._trt.shape)
b, c, h, w = cnn.size()
# assert h == 1, "the height of conv must be 1"
# conv = conv.squeeze(2)
conv = cnn.view(b,c,w)
conv = conv.permute(0, 2, 1) # [w, b, c]
self.rnn.flatten_parameters()
rnn, (h0, c0) = self.rnn(conv)
b, T, h = rnn.size()
rec = rnn.reshape(b*T,h)
e = self.linear(rec) # [T * b, nOut]
output = e.view(T, b,-1)
return output