I got the same Error in first forward call.
Code to reproduce:
# pip install torch==1.11.0+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
# pip install transformers==4.22
import torch
from transformers import BertForSequenceClassification, AutoTokenizer
options = {
'max_length': 512,
'padding': 'max_length',
'truncation': True,
'add_special_tokens': True,
'return_tensors': 'pt',
}
args = {
'pretrained_model_name_or_path': "dbmdz/bert-base-german-uncased",
'do_lower_case': True,
'local_files_only': False,
'use_fast': False,
}
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(**args)
# load BERT with classification head
device = 'cuda'
bert = BertForSequenceClassification.from_pretrained(
pretrained_model_name_or_path="dbmdz/bert-base-german-uncased",
local_files_only=False,
num_labels=10,
torchscript=True,
return_dict=False,
)
bert = bert.to(device)
# SAMPLE INFERENCE
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, **options)
encoded_input = {k: encoded_input[k].to(device) for k in encoded_input}
output = bert(**encoded_input)
output
# Creating the trace
bert.eval()
traced_model = torch.jit.trace(bert, [encoded_input[k] for k in encoded_input.keys()])
# TEST DATASET
text_1 = "Text one."
tokenized_text_1 = tokenizer(text_1, **options)
text_2 = "Text two!"
tokenized_text_2 = tokenizer(text_2, **options)
# Using a traced model for BATCH inference
inference_input_1 = {k:tokenized_text_1[k].unsqueeze(0).to(device) for k in tokenized_text_1.keys()}
inference_input_2 = {k:tokenized_text_2[k].unsqueeze(0).to(device) for k in tokenized_text_2.keys()}
print('FIRST PASS')
traced_model(**inference_input_1)
print('SECOND PASS')
traced_model(**inference_input_2)
Packages:
torch==1.11.0+cu113
transformers==4.22.0
huggingface-hub==0.12.0
tokenizers==0.12.1
Output of python -m torch.utils.collect_env
:
Collecting environment information...
PyTorch version: 1.11.0+cu113
Is debug build: False
CUDA used to build PyTorch: 11.3
ROCM used to build PyTorch: N/A
OS: Ubuntu 20.04.5 LTS (x86_64)
GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
Clang version: 10.0.0-4ubuntu1
CMake version: version 3.22.6
Libc version: glibc-2.31
Python version: 3.8.10 (default, Nov 14 2022, 12:59:47) [GCC 9.4.0] (64-bit runtime)
Python platform: Linux-5.10.147+-x86_64-with-glibc2.29
Is CUDA available: True
CUDA runtime version: 11.2.152
GPU models and configuration: GPU 0: Tesla T4
Nvidia driver version: 510.47.03
cuDNN version: Probably one of the following:
/usr/lib/x86_64-linux-gnu/libcudnn.so.8.1.1
/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.1.1
/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.1.1
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.1.1
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.1.1
/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.1.1
/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.1.1
HIP runtime version: N/A
MIOpen runtime version: N/A
Versions of relevant libraries:
[pip3] numpy==1.21.6
[pip3] torch==1.11.0+cu113
[pip3] torchaudio==0.13.1+cu116
[pip3] torchsummary==1.5.1
[pip3] torchtext==0.14.1
[pip3] torchvision==0.14.1+cu116
[conda] Could not collect
Code Output:
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
Moving 0 files to the new cache system
0/0 [00:00<?, ?it/s]
Downloading (…)okenizer_config.json: 100%
59.0/59.0 [00:00<00:00, 1.96kB/s]
Downloading (…)lve/main/config.json: 100%
433/433 [00:00<00:00, 16.2kB/s]
Downloading (…)solve/main/vocab.txt: 100%
247k/247k [00:00<00:00, 8.81MB/s]
Downloading (…)"pytorch_model.bin";: 100%
442M/442M [00:11<00:00, 30.4MB/s]
Some weights of the model checkpoint at dbmdz/bert-base-german-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-german-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
FIRST PASS
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-4-8e13bd40136b> in <module>
53
54 print('FIRST PASS')
---> 55 traced_model(**inference_input_1)
56
57 print('SECOND PASS')
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
RuntimeError: default_program(22): error: extra text after expected end of number
default_program(25): error: extra text after expected end of number
2 errors detected in the compilation of "default_program".
nvrtc compilation failed:
#define NAN __int_as_float(0x7fffffff)
#define POS_INFINITY __int_as_float(0x7f800000)
#define NEG_INFINITY __int_as_float(0xff800000)
template<typename T>
__device__ T maximum(T a, T b) {
return isnan(a) ? a : (a > b ? a : b);
}
template<typename T>
__device__ T minimum(T a, T b) {
return isnan(a) ? a : (a < b ? a : b);
}
extern "C" __global__
void fused_mul_div_add(float* tattention_scores_1, float* tv_, float* aten_add, float* aten_mul) {
{
if (blockIdx.x<1ll ? 1 : 0) {
float v = __ldg(tv_ + (long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x));
aten_mul[(long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x)] = v * -3.402823466385289e+38.f;
} float v_1 = __ldg(tattention_scores_1 + (long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x));
float v_2 = __ldg(tv_ + ((long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x)) % 512ll);
aten_add[(long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x)] = v_1 / 8.f + v_2 * -3.402823466385289e+38.f;
}
}