Merge branch 'main' of https://github.com/jin38324/LightRAG

2024-11-11 15:21:37 +08:00
parent b1cf41d242 319de6fece
commit 0b6b0064d6
10 changed files with 49 additions and 37 deletions
--- a/lightrag/init.py
+++ b/lightrag/init.py
@@ -1,5 +1,5 @@
 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam

-__version__ = "0.0.8"
+__version__ = "0.0.9"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"
--- a/lightrag/kg/init.py
+++ b/lightrag/kg/init.py
@@ -1,3 +1 @@
 # print ("init package vars here. ......")
-
-
--- a/lightrag/kg/neo4j_impl.py
+++ b/lightrag/kg/neo4j_impl.py
@@ -146,11 +146,11 @@ class Neo4JStorage(BaseGraphStorage):
        entity_name_label_target = target_node_id.strip('"')
        """
        Find all edges between nodes of two given labels
-        
+
        Args:
            source_node_label (str): Label of the source nodes
            target_node_label (str): Label of the target nodes
-            
+
        Returns:
            list: List of all relationships/edges found
        """
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -66,7 +66,6 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
        return loop


-
@dataclass
 class LightRAG:
    working_dir: str = field(
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -562,19 +562,19 @@ async def _find_most_related_text_unit_from_entities(
        if not this_edges:
            continue
        all_one_hop_nodes.update([e[1] for e in this_edges])
-    
+
    all_one_hop_nodes = list(all_one_hop_nodes)
    all_one_hop_nodes_data = await asyncio.gather(
        *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
    )
-    
+
    # Add null check for node data
    all_one_hop_text_units_lookup = {
        k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
        for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
        if v is not None and "source_id" in v  # Add source_id check
    }
-    
+
    all_text_units_lookup = {}
    for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
        for c_id in this_text_units:
@@ -588,7 +588,7 @@ async def _find_most_related_text_unit_from_entities(
                        and c_id in all_one_hop_text_units_lookup[e[1]]
                    ):
                        relation_counts += 1
-            
+
            chunk_data = await text_chunks_db.get_by_id(c_id)
            if chunk_data is not None and "content" in chunk_data:  # Add content check
                all_text_units_lookup[c_id] = {
@@ -596,29 +596,28 @@ async def _find_most_related_text_unit_from_entities(
                    "order": index,
                    "relation_counts": relation_counts,
                }
-    
+
    # Filter out None values and ensure data has content
    all_text_units = [
-        {"id": k, **v} 
-        for k, v in all_text_units_lookup.items() 
+        {"id": k, **v}
+        for k, v in all_text_units_lookup.items()
        if v is not None and v.get("data") is not None and "content" in v["data"]
    ]
-    
+
    if not all_text_units:
        logger.warning("No valid text units found")
        return []
-        
+
    all_text_units = sorted(
-        all_text_units, 
-        key=lambda x: (x["order"], -x["relation_counts"])
+        all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
    )
-    
+
    all_text_units = truncate_list_by_token_size(
        all_text_units,
        key=lambda x: x["data"]["content"],
        max_token_size=query_param.max_token_for_text_unit,
    )
-    
+
    all_text_units = [t["data"] for t in all_text_units]
    return all_text_units