From 3e820cc68ea08127c821f806eb822c96f4cc21b1 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Thu, 20 Feb 2025 14:04:59 +0100 Subject: [PATCH 1/3] fixed default factory --- lightrag/lightrag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 5bb05764..990c1bcf 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -184,7 +184,7 @@ class LightRAG: """Maximum number of concurrent embedding function calls.""" embedding_cache_config: dict[str, Any] = field( - default={ + default_factory= lambda: { "enabled": False, "similarity_threshold": 0.95, "use_llm_check": False, From 214e3e8ad5c4d479d73afc3aee72ecdbfd3b0bf3 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Thu, 20 Feb 2025 14:12:19 +0100 Subject: [PATCH 2/3] fixed last update --- examples/test_faiss.py | 2 +- lightrag/__init__.py | 2 +- lightrag/kg/networkx_impl.py | 4 ++-- lightrag/lightrag.py | 5 ++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/test_faiss.py b/examples/test_faiss.py index ab0ef9f7..c3ac6f47 100644 --- a/examples/test_faiss.py +++ b/examples/test_faiss.py @@ -70,7 +70,7 @@ def main(): ), vector_storage="FaissVectorDBStorage", vector_db_storage_cls_kwargs={ - "cosine_better_than_threshold": 0.3 # Your desired threshold + "cosine_better_than_threshold": 0.2 # Your desired threshold }, ) diff --git a/lightrag/__init__.py b/lightrag/__init__.py index 025fb73b..99f4052f 100644 --- a/lightrag/__init__.py +++ b/lightrag/__init__.py @@ -1,5 +1,5 @@ from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam -__version__ = "1.1.7" +__version__ = "1.1.10" __author__ = "Zirui Guo" __url__ = "https://github.com/HKUDS/LightRAG" diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 614715c4..853bd369 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -16,12 +16,12 @@ import pipmaster as pm if not pm.is_installed("networkx"): pm.install("networkx") + if not pm.is_installed("graspologic"): pm.install("graspologic") -from graspologic import embed import networkx as nx - +from graspologic import embed @final @dataclass diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 990c1bcf..38a6e835 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -738,9 +738,8 @@ class LightRAG: if new_kg is None: logger.info("No new entities or relationships extracted.") else: - async with self._entity_lock: - logger.info("New entities or relationships extracted.") - self.chunk_entity_relation_graph = new_kg + logger.info("New entities or relationships extracted.") + self.chunk_entity_relation_graph = new_kg except Exception as e: logger.error("Failed to extract entities and relationships") From c4562f71b9dcac80fd95b5e5c32dae7d6fba3a67 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Thu, 20 Feb 2025 14:17:26 +0100 Subject: [PATCH 3/3] cleanup extraction --- lightrag/kg/networkx_impl.py | 3 ++- lightrag/lightrag.py | 10 ++-------- lightrag/operate.py | 24 ++++++++++++------------ 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 853bd369..1874719f 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -16,13 +16,14 @@ import pipmaster as pm if not pm.is_installed("networkx"): pm.install("networkx") - + if not pm.is_installed("graspologic"): pm.install("graspologic") import networkx as nx from graspologic import embed + @final @dataclass class NetworkXStorage(BaseGraphStorage): diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 38a6e835..71784a8b 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -184,7 +184,7 @@ class LightRAG: """Maximum number of concurrent embedding function calls.""" embedding_cache_config: dict[str, Any] = field( - default_factory= lambda: { + default_factory=lambda: { "enabled": False, "similarity_threshold": 0.95, "use_llm_check": False, @@ -727,7 +727,7 @@ class LightRAG: async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None: try: - new_kg = await extract_entities( + await extract_entities( chunk, knowledge_graph_inst=self.chunk_entity_relation_graph, entity_vdb=self.entities_vdb, @@ -735,12 +735,6 @@ class LightRAG: llm_response_cache=self.llm_response_cache, global_config=asdict(self), ) - if new_kg is None: - logger.info("No new entities or relationships extracted.") - else: - logger.info("New entities or relationships extracted.") - self.chunk_entity_relation_graph = new_kg - except Exception as e: logger.error("Failed to extract entities and relationships") raise e diff --git a/lightrag/operate.py b/lightrag/operate.py index 27950b7d..a79192ac 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -329,7 +329,7 @@ async def extract_entities( relationships_vdb: BaseVectorStorage, global_config: dict[str, str], llm_response_cache: BaseKVStorage | None = None, -) -> BaseGraphStorage | None: +) -> None: use_llm_func: callable = global_config["llm_model_func"] entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"] enable_llm_cache_for_entity_extract: bool = global_config[ @@ -522,16 +522,18 @@ async def extract_entities( ] ) - if not len(all_entities_data) and not len(all_relationships_data): - logger.warning( - "Didn't extract any entities and relationships, maybe your LLM is not working" - ) - return None + if not (all_entities_data or all_relationships_data): + logger.info("Didn't extract any entities and relationships.") + return - if not len(all_entities_data): - logger.warning("Didn't extract any entities") - if not len(all_relationships_data): - logger.warning("Didn't extract any relationships") + if not all_entities_data: + logger.info("Didn't extract any entities") + if not all_relationships_data: + logger.info("Didn't extract any relationships") + + logger.info( + f"New entities or relationships extracted, entities:{all_entities_data}, relationships:{all_relationships_data}" + ) if entity_vdb is not None: data_for_vdb = { @@ -560,8 +562,6 @@ async def extract_entities( } await relationships_vdb.upsert(data_for_vdb) - return knowledge_graph_inst - async def kg_query( query: str,