cleanup extraction

This commit is contained in:
Yannick Stephan
2025-02-20 14:17:26 +01:00
parent 214e3e8ad5
commit c4562f71b9
3 changed files with 16 additions and 21 deletions

View File

@@ -23,6 +23,7 @@ if not pm.is_installed("graspologic"):
import networkx as nx import networkx as nx
from graspologic import embed from graspologic import embed
@final @final
@dataclass @dataclass
class NetworkXStorage(BaseGraphStorage): class NetworkXStorage(BaseGraphStorage):

View File

@@ -184,7 +184,7 @@ class LightRAG:
"""Maximum number of concurrent embedding function calls.""" """Maximum number of concurrent embedding function calls."""
embedding_cache_config: dict[str, Any] = field( embedding_cache_config: dict[str, Any] = field(
default_factory= lambda: { default_factory=lambda: {
"enabled": False, "enabled": False,
"similarity_threshold": 0.95, "similarity_threshold": 0.95,
"use_llm_check": False, "use_llm_check": False,
@@ -727,7 +727,7 @@ class LightRAG:
async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None: async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
try: try:
new_kg = await extract_entities( await extract_entities(
chunk, chunk,
knowledge_graph_inst=self.chunk_entity_relation_graph, knowledge_graph_inst=self.chunk_entity_relation_graph,
entity_vdb=self.entities_vdb, entity_vdb=self.entities_vdb,
@@ -735,12 +735,6 @@ class LightRAG:
llm_response_cache=self.llm_response_cache, llm_response_cache=self.llm_response_cache,
global_config=asdict(self), global_config=asdict(self),
) )
if new_kg is None:
logger.info("No new entities or relationships extracted.")
else:
logger.info("New entities or relationships extracted.")
self.chunk_entity_relation_graph = new_kg
except Exception as e: except Exception as e:
logger.error("Failed to extract entities and relationships") logger.error("Failed to extract entities and relationships")
raise e raise e

View File

@@ -329,7 +329,7 @@ async def extract_entities(
relationships_vdb: BaseVectorStorage, relationships_vdb: BaseVectorStorage,
global_config: dict[str, str], global_config: dict[str, str],
llm_response_cache: BaseKVStorage | None = None, llm_response_cache: BaseKVStorage | None = None,
) -> BaseGraphStorage | None: ) -> None:
use_llm_func: callable = global_config["llm_model_func"] use_llm_func: callable = global_config["llm_model_func"]
entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"] entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
enable_llm_cache_for_entity_extract: bool = global_config[ enable_llm_cache_for_entity_extract: bool = global_config[
@@ -522,16 +522,18 @@ async def extract_entities(
] ]
) )
if not len(all_entities_data) and not len(all_relationships_data): if not (all_entities_data or all_relationships_data):
logger.warning( logger.info("Didn't extract any entities and relationships.")
"Didn't extract any entities and relationships, maybe your LLM is not working" return
)
return None
if not len(all_entities_data): if not all_entities_data:
logger.warning("Didn't extract any entities") logger.info("Didn't extract any entities")
if not len(all_relationships_data): if not all_relationships_data:
logger.warning("Didn't extract any relationships") logger.info("Didn't extract any relationships")
logger.info(
f"New entities or relationships extracted, entities:{all_entities_data}, relationships:{all_relationships_data}"
)
if entity_vdb is not None: if entity_vdb is not None:
data_for_vdb = { data_for_vdb = {
@@ -560,8 +562,6 @@ async def extract_entities(
} }
await relationships_vdb.upsert(data_for_vdb) await relationships_vdb.upsert(data_for_vdb)
return knowledge_graph_inst
async def kg_query( async def kg_query(
query: str, query: str,