support pipeline mode

2025-01-16 12:52:37 +08:00
parent 17a2ec2bc4
commit d5ae6669ea
5 changed files with 374 additions and 323 deletions
--- a/examples/lightrag_oracle_demo.py
+++ b/examples/lightrag_oracle_demo.py
@@ -87,12 +87,14 @@ async def main():
        # We use Oracle DB as the KV/vector/graph storage
        # You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
        rag = LightRAG(
-            working_dir=WORKING_DIR,        
+            # log_level="DEBUG",
+            working_dir=WORKING_DIR,
            entity_extract_max_gleaning = 1,

-            enable_llm_cache=False,
-            embedding_cache_config= None,  # {"enabled": True,"similarity_threshold": 0.90},
+            enable_llm_cache=True,
            enable_llm_cache_for_entity_extract = True,
+            embedding_cache_config= None,  # {"enabled": True,"similarity_threshold": 0.90},
+            

            chunk_token_size=CHUNK_TOKEN_SIZE,
            llm_model_max_token_size = MAX_TOKENS,
@@ -106,34 +108,30 @@ async def main():
            graph_storage = "OracleGraphStorage",
            kv_storage = "OracleKVStorage", 
            vector_storage="OracleVectorDBStorage",
-            doc_status_storage="OracleDocStatusStorage",

-            addon_params = {"example_number":1, "language":"Simplfied Chinese"},
+            addon_params = {"example_number":1, 
+                            "language":"Simplfied Chinese",
+                            "entity_types": ["organization", "person", "geo", "event"], 
+                            "insert_batch_size":2,
+                            }
        )

        # Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool    
-        rag.key_string_value_json_storage_cls.db = oracle_db
-        rag.vector_db_storage_cls.db = oracle_db
-        rag.graph_storage_cls.db = oracle_db
-        rag.doc_status_storage_cls.db = oracle_db
-        rag.doc_status.db = oracle_db
-        rag.full_docs.db = oracle_db
-        rag.text_chunks.db = oracle_db
-        rag.llm_response_cache.db = oracle_db
-        rag.key_string_value_json_storage_cls.db = oracle_db
-        rag.chunks_vdb.db = oracle_db
-        rag.relationships_vdb.db = oracle_db
-        rag.entities_vdb.db = oracle_db
-        rag.graph_storage_cls.db = oracle_db
-        rag.chunk_entity_relation_graph.db = oracle_db
-        rag.llm_response_cache.db = oracle_db
+        rag.set_storage_client(db_client = oracle_db)

-        rag.chunk_entity_relation_graph.embedding_func = rag.embedding_func
-        
        # Extract and Insert into LightRAG storage
-        with open("./dickens/demo.txt", "r", encoding="utf-8") as f:
-            await rag.ainsert(f.read())
+        with open(WORKING_DIR+"/docs.txt", "r", encoding="utf-8") as f:
+            all_text = f.read()
+            texts = [x for x in all_text.split("\n") if x]
+        
+        # New mode use pipeline
+        await rag.apipeline_process_documents(texts)
+        await rag.apipeline_process_chunks()   
+        await rag.apipeline_process_extract_graph()

+        # Old method use ainsert
+        #await rag.ainsert(texts)
+        
        # Perform search in different modes
        modes = ["naive", "local", "global", "hybrid"]
        for mode in modes: