Hi, all! I’m using torch.jit to prepare a C++ inference program for finetuned BERT model which was trained in python pytorch.
when I used torch.jit.trace to store my BERT model, there are several warning cause by Dropout module. Is it essential for using torch.jit in C++?
traced_sc = torch.jit.trace(md, (tokens, seg,mask))
/home/james/anaconda3/lib/python3.6/site-packages/torch/jit/__init__.py:642: TracerWarning: Trace had nondeterministic nodes. Nodes:
%input.4 : Float(1, 10, 768) = aten::dropout(%input.3, %266, %267), scope: BertForSequenceClassification/BertModel[bert]/BERTEmbeddings[embeddings]/Dropout[dropout]
%attention_probs.1 : Float(1, 12, 10, 10) = aten::dropout(%input.6, %354, %355), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfAttention[self]/Dropout[dropout]
%hidden_states.1 : Float(1, 10, 768) = aten::dropout(%input.8, %386, %387), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfOutput[output]/Dropout[dropout]
%hidden_states.2 : Float(1, 10, 768) = aten::dropout(%input.11, %430, %431), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTOutput[output]/Dropout[dropout]
%attention_probs.2 : Float(1, 12, 10, 10) = aten::dropout(%input.14, %542, %543), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfAttention[self]/Dropout[dropout]
%hidden_states.3 : Float(1, 10, 768) = aten::dropout(%input.16, %574, %575), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfOutput[output]/Dropout[dropout]
%hidden_states.4 : Float(1, 10, 768) = aten::dropout(%input.19, %618, %619), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTOutput[output]/Dropout[dropout]
%attention_probs.3 : Float(1, 12, 10, 10) = aten::dropout(%input.22, %730, %731), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfAttention[self]/Dropout[dropout]
%hidden_states.5 : Float(1, 10, 768) = aten::dropout(%input.24, %762, %763), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfOutput[output]/Dropout[dropout]
%hidden_states.6 : Float(1, 10, 768) = aten::dropout(%input.27, %806, %807), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTOutput[output]/Dropout[dropout]
%attention_probs.4 : Float(1, 12, 10, 10) = aten::dropout(%input.30, %918, %919), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfAttention[self]/Dropout[dropout]
%hidden_states.7 : Float(1, 10, 768) = aten::dropout(%input.32, %950, %951), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfOutput[output]/Dropout[dropout]
%hidden_states.8 : Float(1, 10, 768) = aten::dropout(%input.35, %994, %995), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTOutput[output]/Dropout[dropout]
%attention_probs.5 : Float(1, 12, 10, 10) = aten::dropout(%input.38, %1106, %1107), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfAttention[self]/Dropout[dropout]
%hidden_states.9 : Float(1, 10, 768) = aten::dropout(%input.40, %1138, %1139), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfOutput[output]/Dropout[dropout]
%hidden_states.10 : Float(1, 10, 768) = aten::dropout(%input.43, %1182, %1183), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTOutput[output]/Dropout[dropout]
%attention_probs.6 : Float(1, 12, 10, 10) = aten::dropout(%input.46, %1294, %1295), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfAttention[self]/Dropout[dropout]
%hidden_states.11 : Float(1, 10, 768) = aten::dropout(%input.48, %1326, %1327), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfOutput[output]/Dropout[dropout]
%hidden_states.12 : Float(1, 10, 768) = aten::dropout(%input.51, %1370, %1371), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTOutput[output]/Dropout[dropout]
%attention_probs.7 : Float(1, 12, 10, 10) = aten::dropout(%input.54, %1482, %1483), scope: BertForSequenceClassification/BertModel[bert]/BERTEncoder[encoder]/BERTLayer/BERTAttention[attention]/BERTSelfAttention[self]/Dropout[dropout]
This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace()
_check_trace([example_inputs], func, executor_options, module, check_tolerance, _force_outplace)
/home/james/anaconda3/lib/python3.6/site-packages/torch/jit/__init__.py:642: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error:
Not within tolerance rtol=1e-05 atol=1e-05 at input[0, 28] (4.9810566902160645 vs. -0.7324681282043457) and 310 other locations (100.00%)
_check_trace([example_inputs], func, executor_options, module, check_tolerance, _force_outplace)
These are warnings, so I ignore them and continue to prepare my C++ code
int main(int argc, const char* argv[]) {
std::shared_ptr<torch::jit::script::Module> module =
torch::jit::load(argv[1]);
std::cout << "load model ok\n";
std::vector<torch::jit::IValue> inputs;
long data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
long data1[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
long data2[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
inputs.push_back(
torch::from_blob(data, {1, 10}, torch::dtype(torch::kInt64)));
inputs.push_back(
torch::from_blob(data1, {1, 10}, torch::dtype(torch::kInt64)));
inputs.push_back(
torch::from_blob(data2, {1, 10}, torch::dtype(torch::kInt64)));
auto output = module->forward(inputs).toTensor();
std::cout << "Logits: \n" << output << std::endl;
auto max_result = output.max(1, true);
int max_index = std::get<1>(max_result).item<float>();
std::cout << max_index << std::endl;
return 0;
}
But, it got core dumped:
terminate called after throwing an instance of 'torch::jit::script::ErrorReport'
what():
attribute lookup is not defined on builtin:
def forward(self,
input_ids: Tensor,
input_2: Tensor,
attention_mask_1: Tensor) -> Tensor:
extended_attention_mask = torch.unsqueeze(torch.unsqueeze(attention_mask_1, 1), 2)
self = torch.to(extended_attention_mask, 6, False, False)
attention_mask = torch.mul(torch.rsub(self, 1., 1), CONSTANTS.c0)
position_ids = torch.arange(10, dtype=4, layout=0, device=torch.device("cpu"))
input_1 = torch.expand_as(torch.unsqueeze(position_ids, 0), input_ids)
words_embeddings = torch.embedding(self.bert.embeddings.word_embeddings.weight, input_ids, -1, False, False)
~~~~~~~~~~~~~~~~~~~~ <--- HERE
position_embeddings = torch.embedding(self.bert.embeddings.position_embeddings.weight, input_1, -1, False, False)
token_type_embeddings = torch.embedding(self.bert.embeddings.token_type_embeddings.weight, input_2, -1, False, False)
_0 = torch.add(words_embeddings, position_embeddings, alpha=1)
x_1 = torch.add(_0, token_type_embeddings, alpha=1)
u_1 = torch.mean(x_1, [-1], True)
_1 = torch.pow(torch.sub(x_1, u_1, alpha=1), 2)
s_1 = torch.mean(_1, [-1], True)
_2 = torch.sqrt(torch.add(s_1, CONSTANTS.c1, alpha=1))
x_2 = torch.div(torch.sub(x_1, u_1, alpha=1), _2)
Aborted (core dumped)
Thank you for providing some info!