背景描述
向量存储,也称为向量数据库,是专门设计用于高效存储和索引由人工智能模型生成的向量嵌入的数据库。这些嵌入是表示数据点在多维空间中的高维向量,捕获复杂的语义关系。向量数据库擅长处理大量的高维嵌入数据,这在大型语言模型(LLMs)如GPT、Bard、Claude和LLaMA的背景下尤其有用。
安装依赖
pip install chromadb # pip install faiss-cpu 的代码也差不多 都是向量数据库
编写代码
from langchain_community.document_loaders import TextLoader from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import CharacterTextSplitter from langchain_community.vectorstores import Chroma # Load the document, split it into chunks, embed each chunk and load it into the vector store. raw_documents = TextLoader('./state_of_the_union.txt').load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) documents = text_splitter.split_documents(raw_documents) db = Chroma.from_documents(documents, OpenAIEmbeddings()) # similarity search query = "What did the president say about Ketanji Brown Jackson" docs = db.similarity_search(query) print(docs[0].page_content) # similarity search by vector embedding_vector = OpenAIEmbeddings().embed_query(query) docs = db.similarity_search_by_vector(embedding_vector) print(docs[0].page_content)
实际案例
有一个系统的构建说明说,,类似于需求书类型的内容,大约10万字。
目前我想询问当中的一些内容,比如在我开发系统中,可以提问:某某功能介绍一下。
此时,要回答当时建设需求中的文本内容,通过大模型进行检索和增强,来实现。
实现了如下的一些内容:
通过DocumentLoader 加载了 word 文档
通过 OpenAI Embedding 或 开源的 text2vec-base-chinese 对数据进行向量化处理
持久化向量过的内容
利用LangChain开发整体的功能
使用了 ChatOpenAI,也配置了 ChatGLM3 的方式(本地部署安全且免费)
简易的Flask服务,开发一个GET的方式请求,方便接口调用并返回。
from langchain_community.document_loaders import UnstructuredWordDocumentLoader from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain_community.llms import OpenAI from langchain_community.llms.chatglm3 import ChatGLM3 from langchain_community.document_loaders import Docx2txtLoader from langchain_core.output_parsers import JsonOutputParser from operator import itemgetter from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string from langchain_core.prompts import format_document from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda from langchain_openai.chat_models import ChatOpenAI from langchain_openai import OpenAIEmbeddings from langchain.prompts.prompt import PromptTemplate from langchain.prompts.chat import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_community.vectorstores import DocArrayInMemorySearch from langchain.memory import ConversationBufferMemory import langchain.tools from flask import Flask need_embedding = False persist_directory = 'chroma' if need_embedding: # 加载Word文档并提取文本 # loader = UnstructuredWordDocumentLoader("./short.docx") loader = Docx2txtLoader("./short.docx") documents = loader.load() # 将文本分割成块 text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=500) texts = text_splitter.split_documents(documents) # 初始化向量存储和嵌入 # embeddings = OpenAIEmbeddings() embeddings = HuggingFaceEmbeddings(model_name='./text2vec-base-chinese') db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory) # 保存向量存储 db.persist() else: # 加载向量存储 # embeddings = OpenAIEmbeddings() embeddings = HuggingFaceEmbeddings(model_name='./text2vec-base-chinese') db = Chroma(persist_directory=persist_directory, embedding_function=embeddings) # 定义检索器和生成器 retriever = db.as_retriever() # qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever) # # # 处理用户查询 # query = "全息智能感知" # result = qa.run(query) # print(result) # ===================================== _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its orignal language. Chat History: {chat_history} Follow Up Input: {question} Standalone question:""" CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) template = """Answer the question based only on the following context, 请用中文回复: {context} Question: {question} """ ANSWER_PROMPT = ChatPromptTemplate.from_template(template) DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}") def llm(): result = ChatOpenAI(temperature=0.8) # endpoint_url = "http://10.10.7.160:8000/v1/chat/completions" # result = ChatGLM3( # endpoint_url=endpoint_url, # max_tokens=2048, # ) return result def _combine_documents( docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n" ): doc_strings = [format_document(doc, document_prompt) for doc in docs] return document_separator.join(doc_strings) _inputs = RunnableParallel( standalone_question=RunnablePassthrough.assign( chat_history=lambda x: get_buffer_string(x["chat_history"]) ) | CONDENSE_QUESTION_PROMPT | llm() | StrOutputParser(), ) memory = ConversationBufferMemory( return_messages=True, output_key="answer", input_key="question" ) # First we add a step to load memory # This adds a "memory" key to the input object loaded_memory = RunnablePassthrough.assign( chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"), ) # Now we calculate the standalone question standalone_question = { "standalone_question": { "question": lambda x: x["question"], "chat_history": lambda x: get_buffer_string(x["chat_history"]), } | CONDENSE_QUESTION_PROMPT | llm() | StrOutputParser(), } # Now we retrieve the documents retrieved_documents = { "docs": itemgetter("standalone_question") | retriever, "question": lambda x: x["standalone_question"], } # Now we construct the inputs for the final prompt final_inputs = { "context": lambda x: _combine_documents(x["docs"]), "question": itemgetter("question"), } # And finally, we do the part that returns the answers answer = { "answer": final_inputs | ANSWER_PROMPT | llm(), "docs": itemgetter("docs"), } # And now we put it all together! final_chain = loaded_memory | standalone_question | retrieved_documents | answer # flask app = Flask(__name__) @app.route("/get/<question>") def get(question): inputs = {"question": f"{question}"} result = final_chain.invoke(inputs) # print("=============================") print(f"result1: {result}") return str(result['answer']) app.run(host='0.0.0.0', port=8888, debug=True)