cleanup extraction

2025-02-20 14:17:26 +01:00
parent 214e3e8ad5
commit c4562f71b9
3 changed files with 16 additions and 21 deletions
--- a/lightrag/kg/networkx_impl.py
+++ b/lightrag/kg/networkx_impl.py
@@ -23,6 +23,7 @@ if not pm.is_installed("graspologic"):
 import networkx as nx
 from graspologic import embed
@final
@dataclass
 class NetworkXStorage(BaseGraphStorage):
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -184,7 +184,7 @@ class LightRAG:
    """Maximum number of concurrent embedding function calls."""
    embedding_cache_config: dict[str, Any] = field(
-        default_factory= lambda: {
+        default_factory=lambda: {
            "enabled": False,
            "similarity_threshold": 0.95,
            "use_llm_check": False,
@@ -727,7 +727,7 @@ class LightRAG:
    async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
        try:
-            new_kg = await extract_entities(
+            await extract_entities(
                chunk,
                knowledge_graph_inst=self.chunk_entity_relation_graph,
                entity_vdb=self.entities_vdb,
@@ -735,12 +735,6 @@ class LightRAG:
                llm_response_cache=self.llm_response_cache,
                global_config=asdict(self),
            )
            if new_kg is None:
                logger.info("No new entities or relationships extracted.")
            else:
                logger.info("New entities or relationships extracted.")
                self.chunk_entity_relation_graph = new_kg
        except Exception as e:
            logger.error("Failed to extract entities and relationships")
            raise e
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -329,7 +329,7 @@ async def extract_entities(
    relationships_vdb: BaseVectorStorage,
    global_config: dict[str, str],
    llm_response_cache: BaseKVStorage | None = None,
-) -> BaseGraphStorage | None:
+) -> None:
    use_llm_func: callable = global_config["llm_model_func"]
    entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
    enable_llm_cache_for_entity_extract: bool = global_config[
@@ -522,16 +522,18 @@ async def extract_entities(
        ]
    )
-    if not len(all_entities_data) and not len(all_relationships_data):
+    if not (all_entities_data or all_relationships_data):
-        logger.warning(
+        logger.info("Didn't extract any entities and relationships.")
-            "Didn't extract any entities and relationships, maybe your LLM is not working"
+        return
        )
        return None
-    if not len(all_entities_data):
+    if not all_entities_data:
-        logger.warning("Didn't extract any entities")
+        logger.info("Didn't extract any entities")
-    if not len(all_relationships_data):
+    if not all_relationships_data:
-        logger.warning("Didn't extract any relationships")
+        logger.info("Didn't extract any relationships")
    logger.info(
        f"New entities or relationships extracted, entities:{all_entities_data}, relationships:{all_relationships_data}"
    )
    if entity_vdb is not None:
        data_for_vdb = {
@@ -560,8 +562,6 @@ async def extract_entities(
        }
        await relationships_vdb.upsert(data_for_vdb)
    return knowledge_graph_inst
 async def kg_query(
    query: str,