I'm trying to build up a rag_chain, but encountering this error——TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not ChatPromptValue

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

from langchain_community.document_loaders import TextLoader # 文本加载器
from langchain.text_splitter import CharacterTextSplitter # 文本分块器
import weaviate # 向量数据库
from weaviate.embedded import EmbeddedOptions # 向量嵌入选项
from langchain.prompts import ChatPromptTemplate # 聊天提示模板
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser # 输出解析器
from langchain_community.vectorstores import Chroma
import requests

# 加载文件
loader = TextLoader('/home/david/survey/encrypt_rag.txt')
documents = loader.load()

print("Text loaded.")

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(documents)

from langchain_community.embeddings import HuggingFaceBgeEmbeddings 
model_name = "moka-ai/m3e-base"   
model_kwargs = {'device': 'cpu'}   
encode_kwargs = {'normalize_embeddings': True}   
embedding = HuggingFaceBgeEmbeddings(model_name=model_name,model_kwargs=model_kwargs,encode_kwargs=encode_kwargs)   
persist_directory = 'db'   
db = Chroma.from_documents(documents, embedding, persist_directory=persist_directory)   

print("vector stored.")

from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

# 检索器
retriever = db.as_retriever()

model_path = 'LLM4Binary/llm4decompile-1.3b-v2' # V2 Model

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16).cuda()

llm = model

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# 创建 RAGChain
rag_chain = (
    {"context": configurable_retriever, "question": RunnablePassthrough()}  # 上下文信息
    | prompt
    | llm
    | StrOutputParser()
)

with open(fileName +'_' + OPT[0] +'.pseudo','r') as f:#optimization level O0
    pseudo_func = f.read()

inputs_rag = rag_chain.invoke(pseudo_func)

inputs = tokenizer(inputs_rag, return_tensors="pt").to(model.device)

Here is my code. “inputs_rag = rag_chain.invoke(pseudo_func)” triggers an error:
TypeError: embedding(): argument ‘indices’ (position 2) must be Tensor, not ChatPromptValue

Below is the full error:

Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}
Traceback (most recent call last):
  File "/home/david/LLM4Decompile/ghidra/dec_rag.py", line 169, in <module>
    inputs_rag = rag_chain.invoke(pseudo_func)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 2875, in invoke
    input = step.invoke(input, config)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 4441, in invoke
    return self._call_with_config(
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 1784, in _call_with_config
    context.run(
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/config.py", line 404, in call_func_with_variable_args
    return func(input, **kwargs)  # type: ignore[call-arg]
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 4297, in _invoke
    output = call_func_with_variable_args(
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/langchain_core/runnables/config.py", line 404, in call_func_with_variable_args
    return func(input, **kwargs)  # type: ignore[call-arg]
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 1141, in forward
    outputs = self.model(
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 893, in forward
    inputs_embeds = self.embed_tokens(input_ids)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/sparse.py", line 163, in forward
    return F.embedding(
  File "/home/david/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/functional.py", line 2264, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not ChatPromptValue

I wonder how I can solve it. Thank you so much!

ChatPromptValue seems to be an object defined in LangChain. I don’t know what it contains internally but you would need to transform it to a PyTorch tensor before passing it to the embedding layer.