fix demo

2025-03-03 18:33:42 +08:00
parent 887388c317
commit 1611400854
41 changed files with 1390 additions and 1301 deletions
--- a/examples/lightrag_nvidia_demo.py
+++ b/examples/lightrag_nvidia_demo.py
@@ -1,5 +1,9 @@
 import os
 import asyncio
+import nest_asyncio
+
+nest_asyncio.apply()
+
 from lightrag import LightRAG, QueryParam
 from lightrag.llm import (
    openai_complete_if_cache,
@@ -7,10 +11,12 @@ from lightrag.llm import (
 )
 from lightrag.utils import EmbeddingFunc
 import numpy as np
+from lightrag.kg.shared_storage import initialize_pipeline_status

 # for custom llm_model_func
 from lightrag.utils import locate_json_string_body_from_string

+
 WORKING_DIR = "./dickens"

 if not os.path.exists(WORKING_DIR):
@@ -91,42 +97,37 @@ async def test_funcs():

 # asyncio.run(test_funcs())

+async def initialize_rag():
+    embedding_dimension = await get_embedding_dim()
+    print(f"Detected embedding dimension: {embedding_dimension}")

+    # lightRAG class during indexing
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        # llm_model_name="meta/llama3-70b-instruct", #un comment if
+        embedding_func=EmbeddingFunc(
+            embedding_dim=embedding_dimension,
+            max_token_size=512,  # maximum token size, somehow it's still exceed maximum number of token
+            # so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
+            # so you can adjust to be able to fit the NVIDIA model (future work)
+            func=indexing_embedding_func,
+        ),
+    )
+
+    await rag.initialize_storages()
+    await initialize_pipeline_status()
+    
+    return rag
 async def main():
    try:
-        embedding_dimension = await get_embedding_dim()
-        print(f"Detected embedding dimension: {embedding_dimension}")
-
-        # lightRAG class during indexing
-        rag = LightRAG(
-            working_dir=WORKING_DIR,
-            llm_model_func=llm_model_func,
-            # llm_model_name="meta/llama3-70b-instruct", #un comment if
-            embedding_func=EmbeddingFunc(
-                embedding_dim=embedding_dimension,
-                max_token_size=512,  # maximum token size, somehow it's still exceed maximum number of token
-                # so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
-                # so you can adjust to be able to fit the NVIDIA model (future work)
-                func=indexing_embedding_func,
-            ),
-        )
+        # Initialize RAG instance
+        rag = asyncio.run(initialize_rag())

        # reading file
        with open("./book.txt", "r", encoding="utf-8") as f:
            await rag.ainsert(f.read())

-        # redefine rag to change embedding into query type
-        rag = LightRAG(
-            working_dir=WORKING_DIR,
-            llm_model_func=llm_model_func,
-            # llm_model_name="meta/llama3-70b-instruct", #un comment if
-            embedding_func=EmbeddingFunc(
-                embedding_dim=embedding_dimension,
-                max_token_size=512,
-                func=query_embedding_func,
-            ),
-        )
-
        # Perform naive search
        print("==============Naive===============")
        print(