fix demo

2025-03-03 18:33:42 +08:00
parent 887388c317
commit 1611400854
41 changed files with 1390 additions and 1301 deletions
--- a/README.md
+++ b/README.md
@@ -102,33 +102,47 @@ Use the below Python snippet (in a script) to initialize LightRAG and perform qu

 ```python
 import os
+import asyncio
 from lightrag import LightRAG, QueryParam
 from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed
+from lightrag.kg.shared_storage import initialize_pipeline_status

-rag = LightRAG(
-    working_dir="your/path",
-    embedding_func=openai_embed,
-    llm_model_func=gpt_4o_mini_complete
-)
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir="your/path",
+        embedding_func=openai_embed,
+        llm_model_func=gpt_4o_mini_complete
+    )

-# Insert text
-rag.insert("Your text")
+    await rag.initialize_storages()
+    await initialize_pipeline_status()

-# Perform naive search
-mode="naive"
-# Perform local search
-mode="local"
-# Perform global search
-mode="global"
-# Perform hybrid search
-mode="hybrid"
-# Mix mode Integrates knowledge graph and vector retrieval.
-mode="mix"
+    return rag

-rag.query(
-    "What are the top themes in this story?",
-    param=QueryParam(mode=mode)
-)
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+    # Insert text
+    rag.insert("Your text")
+
+    # Perform naive search
+    mode="naive"
+    # Perform local search
+    mode="local"
+    # Perform global search
+    mode="global"
+    # Perform hybrid search
+    mode="hybrid"
+    # Mix mode Integrates knowledge graph and vector retrieval.
+    mode="mix"
+
+    rag.query(
+        "What are the top themes in this story?",
+        param=QueryParam(mode=mode)
+    )
+
+if __name__ == "__main__":
+    main()
 ```

 ### Query Param
@@ -190,15 +204,21 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
        base_url="https://api.upstage.ai/v1/solar"
    )

-rag = LightRAG(
-    working_dir=WORKING_DIR,
-    llm_model_func=llm_model_func,
-    embedding_func=EmbeddingFunc(
-        embedding_dim=4096,
-        max_token_size=8192,
-        func=embedding_func
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=4096,
+            max_token_size=8192,
+            func=embedding_func
+        )
    )
-)
+
+    await rag.initialize_storages()
+    await initialize_pipeline_status()
+
+    return rag
 ```
 </details>

@@ -210,10 +230,6 @@ rag = LightRAG(
 See `lightrag_hf_demo.py`

 ```python
-from lightrag.llm import hf_model_complete, hf_embed
-from transformers import AutoModel, AutoTokenizer
-from lightrag.utils import EmbeddingFunc
-
 # Initialize LightRAG with Hugging Face model
 rag = LightRAG(
    working_dir=WORKING_DIR,
@@ -242,9 +258,6 @@ If you want to use Ollama models, you need to pull model you plan to use and emb
 Then you only need to set LightRAG as follows:

 ```python
-from lightrag.llm.ollama import ollama_model_complete, ollama_embed
-from lightrag.utils import EmbeddingFunc
-
 # Initialize LightRAG with Ollama model
 rag = LightRAG(
    working_dir=WORKING_DIR,
@@ -325,20 +338,58 @@ LightRAG supports integration with LlamaIndex.

 ```python
 # Using LlamaIndex with direct OpenAI access
+import asyncio
 from lightrag import LightRAG
 from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI
+from lightrag.kg.shared_storage import initialize_pipeline_status

-rag = LightRAG(
-    working_dir="your/path",
-    llm_model_func=llama_index_complete_if_cache,  # LlamaIndex-compatible completion function
-    embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
-        embedding_dim=1536,
-        max_token_size=8192,
-        func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
-    ),
-)
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir="your/path",
+        llm_model_func=llama_index_complete_if_cache,  # LlamaIndex-compatible completion function
+        embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
+            embedding_dim=1536,
+            max_token_size=8192,
+            func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
+        ),
+    )
+
+    await rag.initialize_storages()
+    await initialize_pipeline_status()
+
+    return rag
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Perform naive search
+    print(
+        rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+    )
+
+    # Perform local search
+    print(
+        rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+    )
+
+    # Perform global search
+    print(
+        rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+    )
+
+    # Perform hybrid search
+    print(
+        rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+    )
+
+if __name__ == "__main__":
+    main()
 ```

 #### For detailed documentation and examples, see:
@@ -353,11 +404,6 @@ rag = LightRAG(
 LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it:

 ```python
-from lightrag import LightRAG, QueryParam
-
-# Initialize LightRAG
-rag = LightRAG(working_dir=WORKING_DIR)
-
 # Create conversation history
 conversation_history = [
    {"role": "user", "content": "What is the main character's attitude towards Christmas?"},
@@ -387,11 +433,6 @@ response = rag.query(
 LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it:

 ```python
-from lightrag import LightRAG, QueryParam
-
-# Initialize LightRAG
-rag = LightRAG(working_dir=WORKING_DIR)
-
 # Create query parameters
 query_param = QueryParam(
    mode="hybrid",  # or other mode: "local", "global", "hybrid", "mix" and "naive"
@@ -456,16 +497,6 @@ rag.query_with_separate_keyword_extraction(
 <summary> <b>Insert Custom KG</b> </summary>

 ```python
-rag = LightRAG(
-     working_dir=WORKING_DIR,
-     llm_model_func=llm_model_func,
-     embedding_func=EmbeddingFunc(
-          embedding_dim=embedding_dimension,
-          max_token_size=8192,
-          func=embedding_func,
-     ),
-)
-
 custom_kg = {
    "entities": [
        {
@@ -534,6 +565,7 @@ rag = LightRAG(
        "insert_batch_size": 20  # Process 20 documents per batch
    }
 )
+
 rag.insert(["TEXT1", "TEXT2", "TEXT3", ...])  # Documents will be processed in batches of 20
 ```

@@ -560,27 +592,6 @@ rag.insert(["TEXT1", "TEXT2",...], ids=["ID_FOR_TEXT1", "ID_FOR_TEXT2"])

 </details>

-<details>
-  <summary><b>Incremental Insert</b></summary>
-
-```python
-# Incremental Insert: Insert new documents into an existing LightRAG instance
-rag = LightRAG(
-     working_dir=WORKING_DIR,
-     llm_model_func=llm_model_func,
-     embedding_func=EmbeddingFunc(
-          embedding_dim=embedding_dimension,
-          max_token_size=8192,
-          func=embedding_func,
-     ),
-)
-
-with open("./newText.txt") as f:
-    rag.insert(f.read())
-```
-
-</details>
-
 <details>
  <summary><b>Insert using Pipeline</b></summary>

@@ -592,6 +603,7 @@ And using a routine to process news documents.

 ```python
 rag = LightRAG(..)
+
 await rag.apipeline_enqueue_documents(input)
 # Your routine in loop
 await rag.apipeline_process_enqueue_documents(input)
@@ -633,8 +645,6 @@ export NEO4J_PASSWORD="password"

 # Note: Default settings use NetworkX
 # Initialize LightRAG with Neo4J implementation.
-WORKING_DIR = "./local_neo4jWorkDir"
-
 rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=gpt_4o_mini_complete,  # Use gpt_4o_mini_complete LLM model
@@ -706,26 +716,26 @@ You can also install `faiss-gpu` if you have GPU support.

 - Here we are using `sentence-transformers` but you can also use `OpenAIEmbedding` model with `3072` dimensions.

-```
+```python
 async def embedding_func(texts: list[str]) -> np.ndarray:
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(texts, convert_to_numpy=True)
    return embeddings

 # Initialize LightRAG with the LLM model function and embedding function
-    rag = LightRAG(
-        working_dir=WORKING_DIR,
-        llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=384,
-            max_token_size=8192,
-            func=embedding_func,
-        ),
-        vector_storage="FaissVectorDBStorage",
-        vector_db_storage_cls_kwargs={
-            "cosine_better_than_threshold": 0.3  # Your desired threshold
-        }
-    )
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=llm_model_func,
+    embedding_func=EmbeddingFunc(
+        embedding_dim=384,
+        max_token_size=8192,
+        func=embedding_func,
+    ),
+    vector_storage="FaissVectorDBStorage",
+    vector_db_storage_cls_kwargs={
+        "cosine_better_than_threshold": 0.3  # Your desired threshold
+    }
+)
 ```

 </details>
@@ -733,17 +743,6 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
 ## Delete

 ```python
-
-rag = LightRAG(
-     working_dir=WORKING_DIR,
-     llm_model_func=llm_model_func,
-     embedding_func=EmbeddingFunc(
-          embedding_dim=embedding_dimension,
-          max_token_size=8192,
-          func=embedding_func,
-     ),
-)
-
 #  Delete Entity: Deleting entities by their names
 rag.delete_by_entity("Project Gutenberg")