support pipeline mode

This commit is contained in:
jin
2025-01-16 12:58:15 +08:00
parent d5ae6669ea
commit 6ae8647285
6 changed files with 203 additions and 172 deletions

View File

@@ -89,49 +89,45 @@ async def main():
rag = LightRAG(
# log_level="DEBUG",
working_dir=WORKING_DIR,
entity_extract_max_gleaning = 1,
entity_extract_max_gleaning=1,
enable_llm_cache=True,
enable_llm_cache_for_entity_extract = True,
embedding_cache_config= None, # {"enabled": True,"similarity_threshold": 0.90},
enable_llm_cache_for_entity_extract=True,
embedding_cache_config=None, # {"enabled": True,"similarity_threshold": 0.90},
chunk_token_size=CHUNK_TOKEN_SIZE,
llm_model_max_token_size = MAX_TOKENS,
llm_model_max_token_size=MAX_TOKENS,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=embedding_dimension,
max_token_size=500,
func=embedding_func,
),
graph_storage = "OracleGraphStorage",
kv_storage = "OracleKVStorage",
),
graph_storage="OracleGraphStorage",
kv_storage="OracleKVStorage",
vector_storage="OracleVectorDBStorage",
addon_params = {"example_number":1,
"language":"Simplfied Chinese",
"entity_types": ["organization", "person", "geo", "event"],
"insert_batch_size":2,
}
addon_params={
"example_number": 1,
"language": "Simplfied Chinese",
"entity_types": ["organization", "person", "geo", "event"],
"insert_batch_size": 2,
},
)
# Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
rag.set_storage_client(db_client = oracle_db)
# Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
rag.set_storage_client(db_client=oracle_db)
# Extract and Insert into LightRAG storage
with open(WORKING_DIR+"/docs.txt", "r", encoding="utf-8") as f:
with open(WORKING_DIR + "/docs.txt", "r", encoding="utf-8") as f:
all_text = f.read()
texts = [x for x in all_text.split("\n") if x]
# New mode use pipeline
await rag.apipeline_process_documents(texts)
await rag.apipeline_process_chunks()
await rag.apipeline_process_chunks()
await rag.apipeline_process_extract_graph()
# Old method use ainsert
#await rag.ainsert(texts)
# await rag.ainsert(texts)
# Perform search in different modes
modes = ["naive", "local", "global", "hybrid"]
for mode in modes: