diff --git a/examples/lightrag_api_ollama_demo.py b/examples/lightrag_api_ollama_demo.py index f1b68795..dad2a2e0 100644 --- a/examples/lightrag_api_ollama_demo.py +++ b/examples/lightrag_api_ollama_demo.py @@ -36,7 +36,10 @@ async def init(): llm_model_name="gemma2:9b", llm_model_max_async=4, llm_model_max_token_size=8192, - llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 8192}}, + llm_model_kwargs={ + "host": "http://localhost:11434", + "options": {"num_ctx": 8192}, + }, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, @@ -64,6 +67,8 @@ async def lifespan(app: FastAPI): app = FastAPI( title="LightRAG API", description="API for RAG operations", lifespan=lifespan ) + + # Data models class QueryRequest(BaseModel): query: str diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index 2206f40d..312be872 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -75,7 +75,7 @@ async def get_embedding_dim(): # Initialize RAG instance async def init(): embedding_dimension = await get_embedding_dim() - + rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, @@ -88,9 +88,10 @@ async def init(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + @asynccontextmanager async def lifespan(app: FastAPI): global rag diff --git a/examples/lightrag_bedrock_demo.py b/examples/lightrag_bedrock_demo.py index 700b4391..68e9f962 100644 --- a/examples/lightrag_bedrock_demo.py +++ b/examples/lightrag_bedrock_demo.py @@ -21,6 +21,7 @@ WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -33,9 +34,10 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + def main(): rag = asyncio.run(initialize_rag()) @@ -47,5 +49,7 @@ def main(): print(f"| {mode.capitalize()} |") print("+-" + "-" * len(mode) + "-+\n") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode=mode)) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode=mode) + ) ) diff --git a/examples/lightrag_gemini_demo.py b/examples/lightrag_gemini_demo.py index d20c0d0d..cd2bb579 100644 --- a/examples/lightrag_gemini_demo.py +++ b/examples/lightrag_gemini_demo.py @@ -12,6 +12,7 @@ from lightrag.kg.shared_storage import initialize_pipeline_status import asyncio import nest_asyncio + # Apply nest_asyncio to solve event loop issues nest_asyncio.apply() @@ -79,9 +80,10 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -98,5 +100,6 @@ def main(): print(response) + if __name__ == "__main__": main() diff --git a/examples/lightrag_hf_demo.py b/examples/lightrag_hf_demo.py index 5cd214fd..f2abbb2f 100644 --- a/examples/lightrag_hf_demo.py +++ b/examples/lightrag_hf_demo.py @@ -16,6 +16,7 @@ WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -41,6 +42,7 @@ async def initialize_rag(): return rag + def main(): rag = asyncio.run(initialize_rag()) @@ -49,23 +51,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/lightrag_llamaindex_direct_demo.py b/examples/lightrag_llamaindex_direct_demo.py index d1e68233..d5e3f617 100644 --- a/examples/lightrag_llamaindex_direct_demo.py +++ b/examples/lightrag_llamaindex_direct_demo.py @@ -83,7 +83,7 @@ async def get_embedding_dim(): async def initialize_rag(): embedding_dimension = await get_embedding_dim() - + rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, @@ -96,7 +96,7 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag @@ -111,23 +111,32 @@ def main(): # Test different query modes print("\nNaive Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) print("\nLocal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) print("\nGlobal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) print("\nHybrid Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/lightrag_llamaindex_litellm_demo.py b/examples/lightrag_llamaindex_litellm_demo.py index d7c609b4..6e738628 100644 --- a/examples/lightrag_llamaindex_litellm_demo.py +++ b/examples/lightrag_llamaindex_litellm_demo.py @@ -86,7 +86,7 @@ async def get_embedding_dim(): async def initialize_rag(): embedding_dimension = await get_embedding_dim() - + rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, @@ -99,7 +99,7 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag @@ -114,23 +114,32 @@ def main(): # Test different query modes print("\nNaive Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) print("\nLocal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) print("\nGlobal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) print("\nHybrid Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/lightrag_lmdeploy_demo.py b/examples/lightrag_lmdeploy_demo.py index e640a613..ba118fc9 100644 --- a/examples/lightrag_lmdeploy_demo.py +++ b/examples/lightrag_lmdeploy_demo.py @@ -41,6 +41,7 @@ async def lmdeploy_model_complete( **kwargs, ) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -63,9 +64,10 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -77,23 +79,32 @@ def main(): # Test different query modes print("\nNaive Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) print("\nLocal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) print("\nGlobal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) print("\nHybrid Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/lightrag_nvidia_demo.py b/examples/lightrag_nvidia_demo.py index 9137c1b6..6de0814c 100644 --- a/examples/lightrag_nvidia_demo.py +++ b/examples/lightrag_nvidia_demo.py @@ -97,6 +97,7 @@ async def test_funcs(): # asyncio.run(test_funcs()) + async def initialize_rag(): embedding_dimension = await get_embedding_dim() print(f"Detected embedding dimension: {embedding_dimension}") @@ -117,8 +118,10 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + + async def main(): try: # Initialize RAG instance diff --git a/examples/lightrag_ollama_age_demo.py b/examples/lightrag_ollama_age_demo.py index 22e42190..b1b4607a 100644 --- a/examples/lightrag_ollama_age_demo.py +++ b/examples/lightrag_ollama_age_demo.py @@ -27,6 +27,7 @@ os.environ["AGE_POSTGRES_HOST"] = "localhost" os.environ["AGE_POSTGRES_PORT"] = "5455" os.environ["AGE_GRAPH_NAME"] = "dickens" + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -34,7 +35,10 @@ async def initialize_rag(): llm_model_name="llama3.1:8b", llm_model_max_async=4, llm_model_max_token_size=32768, - llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}}, + llm_model_kwargs={ + "host": "http://localhost:11434", + "options": {"num_ctx": 32768}, + }, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, @@ -47,13 +51,15 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + async def print_stream(stream): async for chunk in stream: print(chunk, end="", flush=True) + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -65,22 +71,30 @@ def main(): # Test different query modes print("\nNaive Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) print("\nLocal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) print("\nGlobal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) print("\nHybrid Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) # stream response @@ -94,5 +108,6 @@ def main(): else: print(resp) + if __name__ == "__main__": main() diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index 6715ea72..cf43aa4a 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -17,6 +17,7 @@ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -24,7 +25,10 @@ async def initialize_rag(): llm_model_name="gemma2:2b", llm_model_max_async=4, llm_model_max_token_size=32768, - llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}}, + llm_model_kwargs={ + "host": "http://localhost:11434", + "options": {"num_ctx": 32768}, + }, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, @@ -36,13 +40,15 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + async def print_stream(stream): async for chunk in stream: print(chunk, end="", flush=True) + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -54,22 +60,30 @@ def main(): # Test different query modes print("\nNaive Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) print("\nLocal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) print("\nGlobal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) print("\nHybrid Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) # stream response @@ -83,5 +97,6 @@ def main(): else: print(resp) + if __name__ == "__main__": main() diff --git a/examples/lightrag_ollama_gremlin_demo.py b/examples/lightrag_ollama_gremlin_demo.py index 4d657afa..893b5606 100644 --- a/examples/lightrag_ollama_gremlin_demo.py +++ b/examples/lightrag_ollama_gremlin_demo.py @@ -32,6 +32,7 @@ os.environ["GREMLIN_TRAVERSE_SOURCE"] = "g" os.environ["GREMLIN_USER"] = "" os.environ["GREMLIN_PASSWORD"] = "" + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -39,7 +40,10 @@ async def initialize_rag(): llm_model_name="llama3.1:8b", llm_model_max_async=4, llm_model_max_token_size=32768, - llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}}, + llm_model_kwargs={ + "host": "http://localhost:11434", + "options": {"num_ctx": 32768}, + }, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, @@ -52,13 +56,15 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + async def print_stream(stream): async for chunk in stream: print(chunk, end="", flush=True) + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -70,22 +76,30 @@ def main(): # Test different query modes print("\nNaive Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) print("\nLocal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) print("\nGlobal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) print("\nHybrid Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) # stream response @@ -99,5 +113,6 @@ def main(): else: print(resp) + if __name__ == "__main__": main() diff --git a/examples/lightrag_ollama_neo4j_milvus_mongo_demo.py b/examples/lightrag_ollama_neo4j_milvus_mongo_demo.py index e5d4064d..b6cc931c 100644 --- a/examples/lightrag_ollama_neo4j_milvus_mongo_demo.py +++ b/examples/lightrag_ollama_neo4j_milvus_mongo_demo.py @@ -32,6 +32,7 @@ os.environ["MILVUS_USER"] = "root" os.environ["MILVUS_PASSWORD"] = "root" os.environ["MILVUS_DB_NAME"] = "lightrag" + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -39,7 +40,10 @@ async def initialize_rag(): llm_model_name="qwen2.5:14b", llm_model_max_async=4, llm_model_max_token_size=32768, - llm_model_kwargs={"host": "http://127.0.0.1:11434", "options": {"num_ctx": 32768}}, + llm_model_kwargs={ + "host": "http://127.0.0.1:11434", + "options": {"num_ctx": 32768}, + }, embedding_func=EmbeddingFunc( embedding_dim=1024, max_token_size=8192, @@ -54,9 +58,10 @@ async def initialize_rag(): await rag.initialize_storages() await initialize_pipeline_status() - + return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -68,23 +73,32 @@ def main(): # Test different query modes print("\nNaive Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) print("\nLocal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) print("\nGlobal Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) print("\nHybrid Search:") print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py index f4af7be6..1c4a7a92 100644 --- a/examples/lightrag_openai_compatible_demo.py +++ b/examples/lightrag_openai_compatible_demo.py @@ -53,6 +53,7 @@ async def test_funcs(): # asyncio.run(test_funcs()) + async def initialize_rag(): embedding_dimension = await get_embedding_dim() print(f"Detected embedding dimension: {embedding_dimension}") @@ -71,6 +72,8 @@ async def initialize_rag(): await initialize_pipeline_status() return rag + + async def main(): try: # Initialize RAG instance diff --git a/examples/lightrag_openai_compatible_demo_embedding_cache.py b/examples/lightrag_openai_compatible_demo_embedding_cache.py index fcdd1ef3..85408f3b 100644 --- a/examples/lightrag_openai_compatible_demo_embedding_cache.py +++ b/examples/lightrag_openai_compatible_demo_embedding_cache.py @@ -53,6 +53,7 @@ async def test_funcs(): # asyncio.run(test_funcs()) + async def initialize_rag(): embedding_dimension = await get_embedding_dim() print(f"Detected embedding dimension: {embedding_dimension}") @@ -76,6 +77,7 @@ async def initialize_rag(): return rag + async def main(): try: # Initialize RAG instance diff --git a/examples/lightrag_openai_compatible_stream_demo.py b/examples/lightrag_openai_compatible_stream_demo.py index b3f237e5..ab3e73a5 100644 --- a/examples/lightrag_openai_compatible_stream_demo.py +++ b/examples/lightrag_openai_compatible_stream_demo.py @@ -15,6 +15,8 @@ if not os.path.exists(WORKING_DIR): print(f"WorkingDir: {WORKING_DIR}") api_key = "empty" + + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -40,11 +42,13 @@ async def initialize_rag(): return rag + async def print_stream(stream): async for chunk in stream: if chunk: print(chunk, end="", flush=True) + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -63,6 +67,6 @@ def main(): else: print(resp) + if __name__ == "__main__": main() - diff --git a/examples/lightrag_openai_demo.py b/examples/lightrag_openai_demo.py index f5f47ee2..138b31a2 100644 --- a/examples/lightrag_openai_demo.py +++ b/examples/lightrag_openai_demo.py @@ -9,6 +9,7 @@ WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -22,6 +23,7 @@ async def initialize_rag(): return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -31,24 +33,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() - diff --git a/examples/lightrag_openai_mongodb_graph_demo.py b/examples/lightrag_openai_mongodb_graph_demo.py index ddf5ca63..67c51892 100644 --- a/examples/lightrag_openai_mongodb_graph_demo.py +++ b/examples/lightrag_openai_mongodb_graph_demo.py @@ -76,23 +76,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/lightrag_openai_neo4j_milvus_redis_demo.py b/examples/lightrag_openai_neo4j_milvus_redis_demo.py index adf87691..88a61246 100644 --- a/examples/lightrag_openai_neo4j_milvus_redis_demo.py +++ b/examples/lightrag_openai_neo4j_milvus_redis_demo.py @@ -50,6 +50,8 @@ embedding_func = EmbeddingFunc( texts, embed_model="shaw/dmeta-embedding-zh", host="http://117.50.173.35:11434" ), ) + + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -79,23 +81,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/lightrag_oracle_demo.py b/examples/lightrag_oracle_demo.py index 53139220..420f1af0 100644 --- a/examples/lightrag_oracle_demo.py +++ b/examples/lightrag_oracle_demo.py @@ -64,6 +64,7 @@ async def get_embedding_dim(): embedding_dim = embedding.shape[1] return embedding_dim + async def initialize_rag(): # Detect embedding dimension embedding_dimension = await get_embedding_dim() @@ -102,6 +103,7 @@ async def initialize_rag(): return rag + async def main(): try: # Initialize RAG instance diff --git a/examples/lightrag_siliconcloud_demo.py b/examples/lightrag_siliconcloud_demo.py index 1deb5a66..7a414aca 100644 --- a/examples/lightrag_siliconcloud_demo.py +++ b/examples/lightrag_siliconcloud_demo.py @@ -47,6 +47,7 @@ async def test_funcs(): asyncio.run(test_funcs()) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -71,24 +72,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() - diff --git a/examples/lightrag_tidb_demo.py b/examples/lightrag_tidb_demo.py index c3f8fd19..f167e9cc 100644 --- a/examples/lightrag_tidb_demo.py +++ b/examples/lightrag_tidb_demo.py @@ -55,6 +55,7 @@ async def get_embedding_dim(): embedding_dim = embedding.shape[1] return embedding_dim + async def initialize_rag(): # Detect embedding dimension embedding_dimension = await get_embedding_dim() @@ -82,6 +83,7 @@ async def initialize_rag(): return rag + async def main(): try: # Initialize RAG instance diff --git a/examples/lightrag_zhipu_demo.py b/examples/lightrag_zhipu_demo.py index 30b7316f..fdc37c9c 100644 --- a/examples/lightrag_zhipu_demo.py +++ b/examples/lightrag_zhipu_demo.py @@ -19,6 +19,7 @@ api_key = os.environ.get("ZHIPUAI_API_KEY") if api_key is None: raise Exception("Please set ZHIPU_API_KEY in your environment") + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -38,6 +39,7 @@ async def initialize_rag(): return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -47,23 +49,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/lightrag_zhipu_postgres_demo.py b/examples/lightrag_zhipu_postgres_demo.py index 310786a5..304c5f2c 100644 --- a/examples/lightrag_zhipu_postgres_demo.py +++ b/examples/lightrag_zhipu_postgres_demo.py @@ -28,6 +28,7 @@ os.environ["POSTGRES_USER"] = "rag" os.environ["POSTGRES_PASSWORD"] = "rag" os.environ["POSTGRES_DATABASE"] = "rag" + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -55,8 +56,9 @@ async def initialize_rag(): return rag + async def main(): - # Initialize RAG instance + # Initialize RAG instance rag = asyncio.run(initialize_rag()) # add embedding_func for graph database, it's deleted in commit 5661d76860436f7bf5aef2e50d9ee4a59660146c diff --git a/examples/query_keyword_separation_example.py b/examples/query_keyword_separation_example.py index de106de6..cbfdd930 100644 --- a/examples/query_keyword_separation_example.py +++ b/examples/query_keyword_separation_example.py @@ -80,6 +80,8 @@ async def test_funcs(): asyncio.run(test_funcs()) embedding_dimension = 3072 + + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -101,7 +103,7 @@ async def initialize_rag(): async def run_example(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) - + book1 = open("./book_1.txt", encoding="utf-8") book2 = open("./book_2.txt", encoding="utf-8") diff --git a/examples/test.py b/examples/test.py index dc186bda..f2456436 100644 --- a/examples/test.py +++ b/examples/test.py @@ -1,4 +1,5 @@ import os +import asyncio from lightrag import LightRAG, QueryParam from lightrag.llm.openai import gpt_4o_mini_complete from lightrag.kg.shared_storage import initialize_pipeline_status @@ -13,6 +14,7 @@ WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -35,23 +37,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/test_chromadb.py b/examples/test_chromadb.py index 10d69cc1..e4e9b698 100644 --- a/examples/test_chromadb.py +++ b/examples/test_chromadb.py @@ -112,12 +112,13 @@ async def initialize_rag(): }, ) - await rag.initialize_storages() await initialize_pipeline_status() return rag + +def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -126,23 +127,32 @@ async def initialize_rag(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/test_faiss.py b/examples/test_faiss.py index 34991f47..febdce14 100644 --- a/examples/test_faiss.py +++ b/examples/test_faiss.py @@ -58,6 +58,7 @@ async def embedding_func(texts: list[str]) -> np.ndarray: embeddings = model.encode(texts, convert_to_numpy=True) return embeddings + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -78,8 +79,8 @@ async def initialize_rag(): return rag + def main(): - # Initialize RAG instance rag = asyncio.run(initialize_rag()) # Insert the custom chunks into LightRAG diff --git a/examples/test_neo4j.py b/examples/test_neo4j.py index 2d1d527a..7f620acc 100644 --- a/examples/test_neo4j.py +++ b/examples/test_neo4j.py @@ -15,6 +15,7 @@ WORKING_DIR = "./local_neo4jWorkDir" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -29,6 +30,7 @@ async def initialize_rag(): return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) @@ -38,23 +40,32 @@ def main(): # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) ) + if __name__ == "__main__": main() diff --git a/examples/test_split_by_character.ipynb b/examples/test_split_by_character.ipynb deleted file mode 100644 index f70f9f34..00000000 --- a/examples/test_split_by_character.ipynb +++ /dev/null @@ -1,1313 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "4b5690db12e34685", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:40:58.307102Z", - "start_time": "2025-01-09T03:40:51.935233Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "import logging\n", - "import numpy as np\n", - "from lightrag import LightRAG, QueryParam\n", - "from lightrag.llm.openai import openai_complete_if_cache, openai_embed\n", - "from lightrag.utils import EmbeddingFunc\n", - "from lightrag.kg.shared_storage import initialize_pipeline_status\n", - "import nest_asyncio" - ] - }, - { - "cell_type": "markdown", - "id": "dd17956ec322b361", - "metadata": {}, - "source": [ - "#### split by character" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "8c8ee7c061bf9159", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:41:13.961167Z", - "start_time": "2025-01-09T03:41:13.958357Z" - } - }, - "outputs": [], - "source": [ - "nest_asyncio.apply()\n", - "WORKING_DIR = \"../../llm_rag/paper_db/R000088_test1\"\n", - "logging.basicConfig(format=\"%(levelname)s:%(message)s\", level=logging.INFO)\n", - "if not os.path.exists(WORKING_DIR):\n", - " os.mkdir(WORKING_DIR)\n", - "API = os.environ.get(\"DOUBAO_API_KEY\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a5009d16e0851dca", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:41:16.862036Z", - "start_time": "2025-01-09T03:41:16.859306Z" - } - }, - "outputs": [], - "source": [ - "async def llm_model_func(\n", - " prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs\n", - ") -> str:\n", - " return await openai_complete_if_cache(\n", - " \"ep-20241218114828-2tlww\",\n", - " prompt,\n", - " system_prompt=system_prompt,\n", - " history_messages=history_messages,\n", - " api_key=API,\n", - " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n", - " **kwargs,\n", - " )\n", - "\n", - "\n", - "async def embedding_func(texts: list[str]) -> np.ndarray:\n", - " return await openai_embed(\n", - " texts,\n", - " model=\"ep-20241231173413-pgjmk\",\n", - " api_key=API,\n", - " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "397fcad24ce4d0ed", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:41:24.950307Z", - "start_time": "2025-01-09T03:41:24.940353Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:lightrag:Logger initialized for working directory: ../../llm_rag/paper_db/R000088_test1\n", - "INFO:lightrag:Load KV llm_response_cache with 0 data\n", - "INFO:lightrag:Load KV full_docs with 0 data\n", - "INFO:lightrag:Load KV text_chunks with 0 data\n", - "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test1/vdb_entities.json'} 0 data\n", - "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test1/vdb_relationships.json'} 0 data\n", - "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test1/vdb_chunks.json'} 0 data\n", - "INFO:lightrag:Loaded document status storage with 0 records\n" - ] - } - ], - "source": [ - "import asyncio\n", - "import nest_asyncio\n", - "\n", - "nest_asyncio.apply()\n", - "\n", - "async def initialize_rag():\n", - " rag = LightRAG(\n", - " working_dir=WORKING_DIR,\n", - " llm_model_func=llm_model_func,\n", - " embedding_func=EmbeddingFunc(\n", - " embedding_dim=4096, max_token_size=8192, func=embedding_func\n", - " ),\n", - " chunk_token_size=512,\n", - " )\n", - " await rag.initialize_storages()\n", - " await initialize_pipeline_status()\n", - "\n", - " return rag\n", - "\n", - "rag = asyncio.run(initialize_rag())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1dc3603677f7484d", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:41:37.947456Z", - "start_time": "2025-01-09T03:41:37.941901Z" - } - }, - "outputs": [], - "source": [ - "with open(\n", - " \"../../llm_rag/example/R000088/auto/R000088_full_txt.md\", \"r\", encoding=\"utf-8\"\n", - ") as f:\n", - " content = f.read()\n", - "\n", - "\n", - "async def embedding_func(texts: list[str]) -> np.ndarray:\n", - " return await openai_embed(\n", - " texts,\n", - " model=\"ep-20241231173413-pgjmk\",\n", - " api_key=API,\n", - " base_url=\"https://ark.cn-beijing.volces.com/api/v3\",\n", - " )\n", - "\n", - "\n", - "async def get_embedding_dim():\n", - " test_text = [\"This is a test sentence.\"]\n", - " embedding = await embedding_func(test_text)\n", - " embedding_dim = embedding.shape[1]\n", - " return embedding_dim" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "6844202606acfbe5", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:41:39.608541Z", - "start_time": "2025-01-09T03:41:39.165057Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n" - ] - } - ], - "source": [ - "embedding_dimension = await get_embedding_dim()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d6273839d9681403", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:44:34.295345Z", - "start_time": "2025-01-09T03:41:48.324171Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:lightrag:Processing 1 new unique documents\n", - "Processing batch 1: 0%| | 0/1 [00:00标签中,针对每个问题详细分析你的思考过程。然后在<回答>标签中给出所有问题的最终答案。\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "7a6491385b050095", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:45:40.829111Z", - "start_time": "2025-01-09T03:45:13.530298Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n", - "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n", - "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n", - "INFO:lightrag:Local query uses 5 entites, 12 relations, 3 text units\n", - "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/embeddings \"HTTP/1.1 200 OK\"\n", - "INFO:lightrag:Global query uses 8 entites, 5 relations, 4 text units\n", - "INFO:httpx:HTTP Request: POST https://ark.cn-beijing.volces.com/api/v3/chat/completions \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "<分析>\n", - "1. **该文献主要研究的问题是什么?**\n", - " - 思考过程:通过浏览论文内容,查找作者明确阐述研究目的的部分。文中多处提及“Our study was performed to explore whether folic acid treatment was associated with cancer outcomes and all-cause mortality after extended follow-up”,表明作者旨在探究叶酸治疗与癌症结局及全因死亡率之间的关系,尤其是在经过长期随访后。\n", - "2. **该文献采用什么方法进行分析?**\n", - " - 思考过程:寻找描述研究方法和数据分析过程的段落。文中提到“Survival curves were constructed using the Kaplan-Meier method and differences in survival between groups were analyzed using the log-rank test. Estimates of hazard ratios (HRs) with 95% CIs were obtained by using Cox proportional hazards regression models stratified by trial”,可以看出作者使用了Kaplan-Meier法构建生存曲线、log-rank检验分析组间生存差异以及Cox比例风险回归模型估计风险比等方法。\n", - "3. **该文献的主要结论是什么?**\n", - " - 思考过程:定位到论文中总结结论的部分,如“Conclusion Treatment with folic acid plus vitamin $\\mathsf{B}_{12}$ was associated with increased cancer outcomes and all-cause mortality in patients with ischemic heart disease in Norway, where there is no folic acid fortification of foods”,可知作者得出叶酸加维生素$\\mathsf{B}_{12}$治疗与癌症结局和全因死亡率增加有关的结论。\n", - "<回答>\n", - "1. 该文献主要研究的问题是:叶酸治疗与癌症结局及全因死亡率之间的关系,尤其是在经过长期随访后,叶酸治疗是否与癌症结局和全因死亡率相关。\n", - "2. 该文献采用的分析方法包括:使用Kaplan-Meier法构建生存曲线、log-rank检验分析组间生存差异、Cox比例风险回归模型估计风险比等。\n", - "3. 该文献的主要结论是:在挪威没有叶酸强化食品的情况下,叶酸加维生素$\\mathsf{B}_{12}$治疗与缺血性心脏病患者的癌症结局和全因死亡率增加有关。\n", - "\n", - "**参考文献**\n", - "- [VD] In2Norwegianhomocysteine-lowering trialsamongpatientswithischemicheart disease, there was a statistically nonsignificantincreaseincancerincidenceinthe groupsassignedtofolicacidtreatment.15,16 Our study was performed to explore whetherfolicacidtreatmentwasassociatedwithcanceroutcomesandall-cause mortality after extended follow-up.\n", - "- [VD] Survivalcurveswereconstructedusing theKaplan-Meiermethodanddifferences insurvivalbetweengroupswereanalyzed usingthelog-ranktest.Estimatesofhazard ratios (HRs) with $95\\%$ CIs were obtainedbyusingCoxproportionalhazards regressionmodelsstratifiedbytrial.\n", - "- [VD] Conclusion Treatment with folic acid plus vitamin $\\mathsf{B}_{12}$ was associated with increased cancer outcomes and all-cause mortality in patients with ischemic heart disease in Norway, where there is no folic acid fortification of foods.\n" - ] - } - ], - "source": [ - "resp = rag.query(prompt1, param=QueryParam(mode=\"mix\", top_k=5))\n", - "print(resp)" - ] - }, - { - "cell_type": "markdown", - "id": "4e5bfad24cb721a8", - "metadata": {}, - "source": [ - "#### split by character only" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "44e2992dc95f8ce0", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:47:40.988796Z", - "start_time": "2025-01-09T03:47:40.982648Z" - } - }, - "outputs": [], - "source": [ - "WORKING_DIR = \"../../llm_rag/paper_db/R000088_test2\"\n", - "if not os.path.exists(WORKING_DIR):\n", - " os.mkdir(WORKING_DIR)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "62c63385d2d973d5", - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-09T03:51:39.951329Z", - "start_time": "2025-01-09T03:49:15.218976Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:lightrag:Logger initialized for working directory: ../../llm_rag/paper_db/R000088_test2\n", - "INFO:lightrag:Load KV llm_response_cache with 0 data\n", - "INFO:lightrag:Load KV full_docs with 0 data\n", - "INFO:lightrag:Load KV text_chunks with 0 data\n", - "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test2/vdb_entities.json'} 0 data\n", - "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test2/vdb_relationships.json'} 0 data\n", - "INFO:nano-vectordb:Init {'embedding_dim': 4096, 'metric': 'cosine', 'storage_file': '../../llm_rag/paper_db/R000088_test2/vdb_chunks.json'} 0 data\n", - "INFO:lightrag:Loaded document status storage with 0 records\n", - "INFO:lightrag:Processing 1 new unique documents\n", - "Processing batch 1: 0%| | 0/1 [00:00\n", - "- **该文献主要研究的问题是什么?**\n", - " - **思考过程**:通过浏览论文的标题、摘要、引言等部分,寻找关于研究目的和问题的描述。论文标题为“Cancer Incidence and Mortality After Treatment With Folic Acid and Vitamin B12”,摘要中的“Objective”部分明确指出研究目的是“To evaluate effects of treatment with B vitamins on cancer outcomes and all-cause mortality in 2 randomized controlled trials”。因此,可以确定该文献主要研究的问题是评估B族维生素治疗对两项随机对照试验中癌症结局和全因死亡率的影响。\n", - "- **该文献采用什么方法进行分析?**\n", - " - **思考过程**:在论文的“METHODS”部分详细描述了研究方法。文中提到这是一个对两项随机、双盲、安慰剂对照临床试验(Norwegian Vitamin [NORVIT] trial和Western Norway B Vitamin Intervention Trial [WENBIT])数据的联合分析,并进行了观察性的试验后随访。具体包括对参与者进行分组干预(不同剂量的叶酸、维生素B12、维生素B6或安慰剂),收集临床信息和血样,分析循环B族维生素、同型半胱氨酸和可替宁等指标,并进行基因分型等,还涉及到多种统计分析方法,如计算预期癌症发生率、构建生存曲线、进行Cox比例风险回归模型分析等。\n", - "- **该文献的主要结论是什么?**\n", - " - **思考过程**:在论文的“Results”和“Conclusion”部分寻找主要结论。研究结果表明,在治疗期间,接受叶酸加维生素B12治疗的参与者血清叶酸浓度显著增加,且在后续随访中,该组癌症发病率、癌症死亡率和全因死亡率均有所上升,主要是肺癌发病率增加,而维生素B6治疗未显示出显著影响。结论部分明确指出“Treatment with folic acid plus vitamin $\\mathsf{B}_{12}$ was associated with increased cancer outcomes and all-cause mortality in patients with ischemic heart disease in Norway, where there is no folic acid fortification of foods”。\n", - "\n", - "\n", - "<回答>\n", - "- **主要研究问题**:评估B族维生素治疗对两项随机对照试验中癌症结局和全因死亡率的影响。\n", - "- **研究方法**:采用对两项随机、双盲、安慰剂对照临床试验(Norwegian Vitamin [NORVIT] trial和Western Norway B Vitamin Intervention Trial [WENBIT])数据的联合分析,并进行观察性的试验后随访,涉及分组干预、多种指标检测以及多种统计分析方法。\n", - "- **主要结论**:在挪威(食品中未添加叶酸),对于缺血性心脏病患者,叶酸加维生素B12治疗与癌症结局和全因死亡率的增加有关,而维生素B6治疗未显示出显著影响。\n", - "\n", - "**参考文献**\n", - "- [VD] Cancer Incidence and Mortality After Treatment With Folic Acid and Vitamin B12\n", - "- [VD] METHODS Study Design, Participants, and Study Intervention\n", - "- [VD] RESULTS\n", - "- [VD] Conclusion\n", - "- [VD] Objective To evaluate effects of treatment with B vitamins on cancer outcomes and all-cause mortality in 2 randomized controlled trials.\n" - ] - } - ], - "source": [ - "resp = rag.query(prompt1, param=QueryParam(mode=\"mix\", top_k=5))\n", - "print(resp)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ba6fa79a2550d10", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/vram_management_demo.py b/examples/vram_management_demo.py index f4d46ab4..36eb5468 100644 --- a/examples/vram_management_demo.py +++ b/examples/vram_management_demo.py @@ -14,6 +14,7 @@ TEXT_FILES_DIR = "/llm/mt" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): # Initialize LightRAG rag = LightRAG( @@ -31,6 +32,7 @@ async def initialize_rag(): return rag + # Read all .txt files from the TEXT_FILES_DIR directory texts = [] for filename in os.listdir(TEXT_FILES_DIR): @@ -82,7 +84,8 @@ def main(): try: print( rag.query( - "What are the top themes in this story?", param=QueryParam(mode="global") + "What are the top themes in this story?", + param=QueryParam(mode="global"), ) ) except Exception as e: @@ -91,18 +94,17 @@ def main(): try: print( rag.query( - "What are the top themes in this story?", param=QueryParam(mode="hybrid") + "What are the top themes in this story?", + param=QueryParam(mode="hybrid"), ) ) except Exception as e: print(f"Error performing hybrid search: {e}") - # Function to clear VRAM resources def clear_vram(): os.system("sudo nvidia-smi --gpu-reset") - # Regularly clear VRAM to prevent overflow clear_vram_interval = 3600 # Clear once every hour start_time = time.time() @@ -114,5 +116,6 @@ def main(): start_time = current_time time.sleep(60) # Check the time every minute + if __name__ == "__main__": main() diff --git a/reproduce/Step_1.py b/reproduce/Step_1.py index 6df00b8a..c94015ad 100644 --- a/reproduce/Step_1.py +++ b/reproduce/Step_1.py @@ -31,6 +31,7 @@ WORKING_DIR = f"../{cls}" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG(working_dir=WORKING_DIR) @@ -39,6 +40,7 @@ async def initialize_rag(): return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) diff --git a/reproduce/Step_1_openai_compatible.py b/reproduce/Step_1_openai_compatible.py index 7e4139b8..3b9944eb 100644 --- a/reproduce/Step_1_openai_compatible.py +++ b/reproduce/Step_1_openai_compatible.py @@ -62,6 +62,7 @@ WORKING_DIR = f"../{cls}" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def initialize_rag(): rag = LightRAG( working_dir=WORKING_DIR, @@ -76,6 +77,7 @@ async def initialize_rag(): return rag + def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag())