I have trained a model containing GRU. When I try to convert it to jit, I found that the outputs of the original model and jit model are different. This problem can be reconstructed by the code below. It’s very strange that other operation(like fc/conv) is ok, the output of print(model(y)-jit_model(y)) is an zero matrix whhile GRU is not.
import torch
from torch import nn
class gruModel(nn.Module):
def __init__(self):
super(gruModel, self).__init__()
self.biGRU = nn.GRU(256*5, 100, num_layers=1, bidirectional=True, batch_first=True, bias=True)
# self.fc = nn.Linear(256*5, 200)
def forward(self, x):
# x = self.fc(x)
x, _ = self.biGRU(x, torch.zeros(2, x.size(0), 100, device=x.device))
return x
if __name__ == '__main__':
y = torch.rand([1, 256, 1280]).cuda()
model = gruModel()
model = torch.nn.DataParallel(model).to(torch.device('cuda'))
model.eval()
traced_script = torch.jit.trace(model.module, y)
traced_script.eval()
traced_script.save("gru_jit.pt")
jit_model = torch.jit.load("gru_jit.pt")
print(model(y)-jit_model(y))