I’m comparing inference in a traced model using (1) python and (2) c++. The outputs are different. Has anyone successfully run GPT-2 inference with c++?
xposted here: https://github.com/pytorch/pytorch/issues/36891
python version: 3.7.4
torch version: 1.4.0
transformers version: 2.6.0
Creating the model in python
from transformers import GPT2LMHeadModel
model = GPT2LMHeadModel.from_pretrained("distilgpt2",
pad_token_id=tokenizer.eos_token_id,
torchscript=True)
test_text = 'The bear went over the mountain to see'
tokens_tensor = torch.tensor([tokenizer.encode(test_text)])
traced_model = torch.jit.trace(model, [tokens_tensor])
torch.jit.save(traced_model,"traced_distilgpt2.pt")
Inference in pytorch
loaded_model = torch.jit.load("traced_distilgpt2.pt")
outputs = loaded_model(torch.tensor([[464, 6842, 1816, 625, 262, 8598, 284, 766]]))
torch.max(outputs[0],2)
>> torch.return_types.max(values=tensor([[-26.7050, -53.3268, -66.3666, -50.1084, -54.8050, -72.4056, -55.2586,-63.5215]], grad_fn=<MaxBackward0>),
indices=tensor([[ 383, 373, 319, 262, 1353, 290, 262, 262]]))
torch.min(outputs[0][:, :, :],2)
>>torch.return_types.min(
values=tensor([[ -47.3794, -77.6341, -95.8365, -75.3026, -81.6899, -103.6701, -83.0335, -93.0259]], grad_fn=<MinBackward0>),
indices=tensor([[ 154, 7134, 31204, 22997, 10298, 31204, 22997, 31573]]))
Inference in c
int main(int argc, const char* argv[]) {
torch::jit::script::Module module;
module = torch::jit::load(argv[1]);
std::cout << "Model loaded.\n";
module.eval(); // just in case?
torch::Tensor x = torch::tensor({464, 6842, 1816, 625, 262, 8598, 284, 766},
torch::dtype(torch::kInt64)).reshape({8, 1});
std::vector<torch::jit::IValue> inputs;
inputs.push_back(x);
// Execute the model and turn its output into a tensor (all_encoder_layers).
torch::Tensor out = module.forward(inputs).toTuple()->elements()[0].toTensor();
auto z = torch::max(out, /*dim=*/2);
std::cout << std::get<0>(z) << '\n';
std::cout << std::get<1>(z) << '\n';
z = torch::min(out, /*dim=*/2);
std::cout << std::get<0>(z) << '\n';
std::cout << std::get<1>(z) << '\n';
return 0;
}
Output from running c exe (with argv[1] = “traced_distilgpt2.pt”) :
[ Variable[CPULongType]{8,1} ]
-26.7050
-28.0251
-29.0539
-28.2608
-26.3226
-28.0652
-26.8328
-28.5087
[ Variable[CPUFloatType]{8,1} ]
383
383
383
383
383
383
383
383
[ Variable[CPULongType]{8,1} ]
-47.3794
-48.2470
-49.5565
-49.0826
-46.3104
-48.5934
-47.3013
-49.1308
[ Variable[CPUFloatType]{8,1} ]
154
154
154
154
154
154
154
154
[ Variable[CPULongType]{8,1} ]
Observation:
The models agree for the first token (position=0) but for positions 1-7 the encoder outputs disagree.