From 9bab6300592d182c8be26ce7e2a675331610ee6b Mon Sep 17 00:00:00 2001 From: 90houlaoheshang <907333918@qq.com> Date: Tue, 5 Nov 2024 15:13:48 +0800 Subject: [PATCH 1/7] =?UTF-8?q?fastapi=E6=8E=A5=E6=94=B6=E7=8E=AF=E5=A2=83?= =?UTF-8?q?=E5=8F=98=E9=87=8FEMBEDDING=5FMODEL=E3=80=81LLM=5FMODEL?= =?UTF-8?q?=E3=80=81OPENAI=5FAPI=5FKEY=E3=80=81OPENAI=5FBASE=5FURL?= =?UTF-8?q?=E4=BB=A5=E8=87=AA=E5=AE=9A=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../lightrag_api_openai_compatible_demo.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index 2cd262bb..7e1f608a 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -21,19 +21,18 @@ print(f"WORKING_DIR: {WORKING_DIR}") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + # LLM model function async def llm_model_func( - prompt, system_prompt=None, history_messages=[], **kwargs + prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: return await openai_complete_if_cache( - "gpt-4o-mini", + os.environ.get("LLM_MODEL", "gpt-4o-mini"), prompt, system_prompt=system_prompt, history_messages=history_messages, - api_key="YOUR_API_KEY", - base_url="YourURL/v1", **kwargs, ) @@ -44,21 +43,28 @@ async def llm_model_func( async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embedding( texts, - model="text-embedding-3-large", - api_key="YOUR_API_KEY", - base_url="YourURL/v1", + model=os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large"), ) +async def get_embedding_dim(): + test_text = ["This is a test sentence."] + embedding = await embedding_func(test_text) + embedding_dim = embedding.shape[1] + print(f"{embedding_dim=}") + return embedding_dim + + # Initialize RAG instance rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=3072, max_token_size=8192, func=embedding_func - ), + embedding_func=EmbeddingFunc(embedding_dim=asyncio.run(get_embedding_dim()), + max_token_size=8192, + func=embedding_func), ) + # Data models From 78a1b8ed7bbc198a1b5089168aa5ebec6b811604 Mon Sep 17 00:00:00 2001 From: 90houlaoheshang <907333918@qq.com> Date: Tue, 5 Nov 2024 16:14:45 +0800 Subject: [PATCH 2/7] =?UTF-8?q?docs(README):=20=E6=B7=BB=E5=8A=A0=E7=8E=AF?= =?UTF-8?q?=E5=A2=83=E5=8F=98=E9=87=8F=E9=85=8D=E7=BD=AE=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 README.md 文件中补充了环境变量配置步骤 - 新增 OPENAI_BASE_URL、OPENAI_API_KEY、LLM_MODEL 和 EMBEDDING_MODEL 环境变量的说明 - 为使用 OpenAI API 和自定义模型提供了更详细的配置指南 --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 6602f1d3..3e3a1761 100644 --- a/README.md +++ b/README.md @@ -498,6 +498,10 @@ pip install fastapi uvicorn pydantic 2. Set up your environment variables: ```bash export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" +export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1" +export OPENAI_API_KEY="Your OpenAI API key" # Required +export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini" +export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large" ``` 3. Run the API server: From 3cc448fc268cb13b03c95926bd392a58a7d4ead1 Mon Sep 17 00:00:00 2001 From: 90houlaoheshang <907333918@qq.com> Date: Tue, 5 Nov 2024 16:44:32 +0800 Subject: [PATCH 3/7] =?UTF-8?q?API=E4=B8=AD=E5=8F=AF=E4=BB=A5=E9=80=9A?= =?UTF-8?q?=E8=BF=87POST=E5=8F=82=E6=95=B0=E6=8E=A7=E5=88=B6=E6=9F=A5?= =?UTF-8?q?=E8=AF=A2=E6=98=AF=E5=90=A6only=5Fneed=5Fcontext?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/lightrag_api_openai_compatible_demo.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index 7e1f608a..8a7286b7 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -71,6 +71,7 @@ rag = LightRAG( class QueryRequest(BaseModel): query: str mode: str = "hybrid" + only_need_context: bool = False class InsertRequest(BaseModel): @@ -95,7 +96,8 @@ async def query_endpoint(request: QueryRequest): try: loop = asyncio.get_event_loop() result = await loop.run_in_executor( - None, lambda: rag.query(request.query, param=QueryParam(mode=request.mode)) + None, lambda: rag.query(request.query, + param=QueryParam(mode=request.mode, only_need_context=request.only_need_context)) ) return Response(status="success", data=result) except Exception as e: From 11b791c72b651eaf5f5244c6d0888a3da525fef1 Mon Sep 17 00:00:00 2001 From: 90houlaoheshang <907333918@qq.com> Date: Tue, 5 Nov 2024 16:48:29 +0800 Subject: [PATCH 4/7] README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3e3a1761..33d36ae1 100644 --- a/README.md +++ b/README.md @@ -526,7 +526,8 @@ The API server provides the following endpoints: ```json { "query": "Your question here", - "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid" + "mode": "hybrid", // Can be "naive", "local", "global", or "hybrid" + "only_need_context": true // Optional: Defaults to false, if true, only the referenced context will be returned, otherwise the llm answer will be returned } ``` - **Example:** From a140f744a3a7933e247f4dcb1d54fbb79a41b4ee Mon Sep 17 00:00:00 2001 From: 90houlaoheshang <907333918@qq.com> Date: Tue, 5 Nov 2024 17:22:04 +0800 Subject: [PATCH 5/7] =?UTF-8?q?refactor(lightrag):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=8A=E4=BC=A0=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除 InsertFileRequest 模型,改用 FastAPI 的 File 和 UploadFile - 修改 insert_file 函数,以适应新的文件上传方式 - 更新函数参数和逻辑,支持直接上传文件 - 优化错误处理和响应消息 --- .../lightrag_api_openai_compatible_demo.py | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index 8a7286b7..94475199 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -1,4 +1,4 @@ -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, File, UploadFile from pydantic import BaseModel import os from lightrag import LightRAG, QueryParam @@ -78,10 +78,6 @@ class InsertRequest(BaseModel): text: str -class InsertFileRequest(BaseModel): - file_path: str - - class Response(BaseModel): status: str data: Optional[str] = None @@ -115,30 +111,22 @@ async def insert_endpoint(request: InsertRequest): @app.post("/insert_file", response_model=Response) -async def insert_file(request: InsertFileRequest): +async def insert_file(file: UploadFile = File(...)): try: - # Check if file exists - if not os.path.exists(request.file_path): - raise HTTPException( - status_code=404, detail=f"File not found: {request.file_path}" - ) - + file_content = await file.read() # Read file content try: - with open(request.file_path, "r", encoding="utf-8") as f: - content = f.read() + content = file_content.decode("utf-8") except UnicodeDecodeError: # If UTF-8 decoding fails, try other encodings - with open(request.file_path, "r", encoding="gbk") as f: - content = f.read() - + content = file_content.decode("gbk") # Insert file content loop = asyncio.get_event_loop() await loop.run_in_executor(None, lambda: rag.insert(content)) return Response( status="success", - message=f"File content from {request.file_path} inserted successfully", + message=f"File content from {file.filename} inserted successfully", ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) From deca6305a953fc74290452b769354f848af536a2 Mon Sep 17 00:00:00 2001 From: 90houlaoheshang <907333918@qq.com> Date: Wed, 6 Nov 2024 10:48:59 +0800 Subject: [PATCH 6/7] =?UTF-8?q?feat(lightrag):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F=E6=8E=A7=E5=88=B6=E5=B5=8C?= =?UTF-8?q?=E5=85=A5=E5=B1=82=E6=9C=80=E5=A4=A7=20token=20=E6=95=B0?= =?UTF-8?q?=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 lightrag_api_openai_compatible_demo.py 中,使用环境变量 EMBEDDING_MAX_TOKEN_SIZE 来设置嵌入层的最大 token 数量,默认值为 8192 --- examples/lightrag_api_openai_compatible_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index 94475199..bc56ac59 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -60,7 +60,7 @@ rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc(embedding_dim=asyncio.run(get_embedding_dim()), - max_token_size=8192, + max_token_size=os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192), func=embedding_func), ) From 846937195e8b93bfa3a2ce59b542fdaee17eb5d7 Mon Sep 17 00:00:00 2001 From: 90houlaoheshang <907333918@qq.com> Date: Wed, 6 Nov 2024 11:13:37 +0800 Subject: [PATCH 7/7] =?UTF-8?q?=E9=9B=86=E4=B8=AD=E5=A4=84=E7=90=86?= =?UTF-8?q?=E7=8E=AF=E5=A2=83=E5=8F=98=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/lightrag_api_openai_compatible_demo.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index bc56ac59..20a05a5f 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -18,6 +18,13 @@ app = FastAPI(title="LightRAG API", description="API for RAG operations") # Configure working directory WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}") print(f"WORKING_DIR: {WORKING_DIR}") +LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini") +print(f"LLM_MODEL: {LLM_MODEL}") +EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large") +print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}") +EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192)) +print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}") + if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) @@ -29,7 +36,7 @@ async def llm_model_func( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: return await openai_complete_if_cache( - os.environ.get("LLM_MODEL", "gpt-4o-mini"), + LLM_MODEL, prompt, system_prompt=system_prompt, history_messages=history_messages, @@ -43,7 +50,7 @@ async def llm_model_func( async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embedding( texts, - model=os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large"), + model=EMBEDDING_MODEL, ) @@ -60,7 +67,7 @@ rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc(embedding_dim=asyncio.run(get_embedding_dim()), - max_token_size=os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192), + max_token_size=EMBEDDING_MAX_TOKEN_SIZE, func=embedding_func), )