LSTM change batch_first=True,loss does not decrease

I use CRNN in https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py. The training process is normal.
But for tensorrt deployment, when I change the batch_first=True in LSTM,the loss does not decrease.
WHY?

class CRNN(nn.Module):

    def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
        super(CRNN, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

        # ks = [3, 3, 3, 3, 3, 3, 2]
        # ps = [1, 1, 1, 1, 1, 1, 0]
        # ss = [1, 1, 1, 1, 1, 1, 1]
        ks = [3, 3, 3, 3, 3]
        ps = [1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1]

        # nm = [64, 128, 256, 256, 512, 512, 512]
        nm = [64, 128, 128, 256, 256]

        cnn = nn.Sequential()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leakyRelu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        # convRelu(1)
        # cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(1, True)
        convRelu(2)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((3, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(3, True)
        # convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((3, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(4, True)  # 512x1x16

        self.cnn = cnn
        # self.rnn = BidirectionalLSTM(256, nh, nclass)
        self.rnn = nn.LSTM(256, nh,2,bidirectional=True, batch_first=True)
        self.linear = nn.Linear(nh * 2, nclass, bias=True)
        '''
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))
        '''

    def forward(self, input):
        # conv features
        cnn = self.cnn(input)
        # print(cnn.size(),cnn._trt.shape)
        b, c, h, w = cnn.size()
        # assert h == 1, "the height of conv must be 1"
        # conv = conv.squeeze(2)
        conv = cnn.view(b,c,w)
        conv = conv.permute(0, 2, 1) # [w, b, c]
        self.rnn.flatten_parameters()
        rnn, (h0, c0) = self.rnn(conv)

        b, T, h = rnn.size()
        rec = rnn.reshape(b*T,h)

        e = self.linear(rec)  # [T * b, nOut]
        output = e.view(T, b,-1)
        return output

Could Someone solve my problem?