from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from langchain_community.document_loaders import TextLoader # 文本加载器
from langchain.text_splitter import CharacterTextSplitter # 文本分块器
import weaviate # 向量数据库
from weaviate.embedded import EmbeddedOptions # 向量嵌入选项
from langchain.prompts import ChatPromptTemplate # 聊天提示模板
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser # 输出解析器
from langchain_community.vectorstores import Chroma
import requests
# 加载文件
loader = TextLoader('/home/david/survey/encrypt_rag.txt')
documents = loader.load()
print("Text loaded.")
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(documents)
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
model_name = "moka-ai/m3e-base"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
embedding = HuggingFaceBgeEmbeddings(model_name=model_name,model_kwargs=model_kwargs,encode_kwargs=encode_kwargs)
persist_directory = 'db'
db = Chroma.from_documents(documents, embedding, persist_directory=persist_directory)
print("vector stored.")
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")
# 检索器
retriever = db.as_retriever()
model_path = 'LLM4Binary/llm4decompile-1.3b-v2' # V2 Model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16).cuda()
llm = model
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# 创建 RAGChain
rag_chain = (
{"context": configurable_retriever, "question": RunnablePassthrough()} # 上下文信息
| prompt
| llm
| StrOutputParser()
)
with open(fileName +'_' + OPT[0] +'.pseudo','r') as f:#optimization level O0
pseudo_func = f.read()
inputs_rag = rag_chain.invoke(pseudo_func)
inputs = tokenizer(inputs_rag, return_tensors="pt").to(model.device)
Here is my code. “inputs_rag = rag_chain.invoke(pseudo_func)” triggers an error:
TypeError: embedding(): argument ‘indices’ (position 2) must be Tensor, not ChatPromptValue
Below is the full error:
Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}
Traceback (most recent call last):
File "/home/david/LLM4Decompile/ghidra/dec_rag.py", line 169, in <module>
inputs_rag = rag_chain.invoke(pseudo_func)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 2875, in invoke
input = step.invoke(input, config)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 4441, in invoke
return self._call_with_config(
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 1784, in _call_with_config
context.run(
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/config.py", line 404, in call_func_with_variable_args
return func(input, **kwargs) # type: ignore[call-arg]
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 4297, in _invoke
output = call_func_with_variable_args(
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/config.py", line 404, in call_func_with_variable_args
return func(input, **kwargs) # type: ignore[call-arg]
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 1141, in forward
outputs = self.model(
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 893, in forward
inputs_embeds = self.embed_tokens(input_ids)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/sparse.py", line 163, in forward
return F.embedding(
File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/functional.py", line 2264, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not ChatPromptValue
I wonder how I can solve it. Thank you so much!