I’m try to export a onnx model base on torch.onnx.export for llama v2, using the script testLlame_om.py can be executed, but after it finish, there is no llama_opt.onnx generated without error message.
from transformers import LlamaForCausalLM, AutoTokenizer
import torch
import torch_npu
import time
torch.set_printoptions(profile="full")
# torch.npu.set_compile_mode(jit_compile=True)
device="npu:1"
#下载好的hf模型地址
hf_model_path = 'models--daryl149--llama-2-7b-hf'
Model = LlamaForCausalLM.from_pretrained(hf_model_path, device_map=device)
tokenizer = AutoTokenizer.from_pretrained(hf_model_path)
# print(Model)
class SumModule(torch.nn.Module):
def forward(self, x):
time_start = time.time()
generate_ids = Model.generate(x, max_length=512)
time_end = time.time()
def export():
models = SumModule()
input_ids = torch.tensor([1, 3923, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30, 3639, 596, 5655, 6975, 30])
input_ids_npu = input_ids.to(device).unsqueeze(0)
models.eval() # set infer mode
torch.onnx.export(
models,
(input_ids_npu),
"llama_opt.onnx",
opset_version=11,
export_params=False,
verbose=False,
input_names=["x"],
output_names=["z"],
)
if __name__ == "__main__":
export()