Remove list_of_list_to_dict function

2025-05-07 18:01:23 +08:00
parent 156244e260
commit 3eb3b170ab
2 changed files with 70 additions and 148 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -25,7 +25,6 @@ from .utils import (
    CacheData,
    get_conversation_turns,
    use_llm_func_with_cache,
    list_of_list_to_dict,
 )
 from .base import (
    BaseGraphStorage,
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
        entities_context = []
        relations_context = []
-        # Create text_units_context in the same format as _get_edge_data and _get_node_data
+        # Create text_units_context directly as a list of dictionaries
-        text_units_section_list = [["id", "content", "file_path"]]
+        text_units_context = []
        for i, chunk in enumerate(maybe_trun_chunks):
-            # Add to text_units_section_list
+            text_units_context.append(
-            text_units_section_list.append(
+                {
-                [
+                    "id": i + 1,
-                    i + 1,  # id
+                    "content": chunk["content"],
-                    chunk["content"],  # content
+                    "file_path": chunk["file_path"],
-                    chunk["file_path"],  # file_path
+                }
                ]
            )
        # Convert to dictionary format using list_of_list_to_dict
        text_units_context = list_of_list_to_dict(text_units_section_list)
        return entities_context, relations_context, text_units_context
    except Exception as e:
        logger.error(f"Error in _get_vector_context: {e}")
@@ -1398,17 +1392,7 @@ async def _get_node_data(
    )
    # build prompt
-    entites_section_list = [
+    entities_context = []
        [
            "id",
            "entity",
            "type",
            "description",
            "rank",
            "created_at",
            "file_path",
        ]
    ]
    for i, n in enumerate(node_datas):
        created_at = n.get("created_at", "UNKNOWN")
        if isinstance(created_at, (int, float)):
@@ -1417,32 +1401,19 @@ async def _get_node_data(
        # Get file path from node data
        file_path = n.get("file_path", "unknown_source")
-        entites_section_list.append(
+        entities_context.append(
-            [
+            {
-                i + 1,
+                "id": i + 1,
-                n["entity_name"],
+                "entity": n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
+                "type": n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
-                n["rank"],
+                "rank": n["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    entities_context = list_of_list_to_dict(entites_section_list)
-    relations_section_list = [
+    relations_context = []
        [
            "id",
            "entity1",
            "entity2",
            "description",
            "keywords",
            "weight",
            "rank",
            "created_at",
            "file_path",
        ]
    ]
    for i, e in enumerate(use_relations):
        created_at = e.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1452,27 +1423,29 @@ async def _get_node_data(
        # Get file path from edge data
        file_path = e.get("file_path", "unknown_source")
-        relations_section_list.append(
+        relations_context.append(
-            [
+            {
-                i + 1,
+                "id": i + 1,
-                e["src_tgt"][0],
+                "entity1": e["src_tgt"][0],
-                e["src_tgt"][1],
+                "entity2": e["src_tgt"][1],
-                e["description"],
+                "description": e["description"],
-                e["keywords"],
+                "keywords": e["keywords"],
-                e["weight"],
+                "weight": e["weight"],
-                e["rank"],
+                "rank": e["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    relations_context = list_of_list_to_dict(relations_section_list)
-    text_units_section_list = [["id", "content", "file_path"]]
+    text_units_context = []
    for i, t in enumerate(use_text_units):
-        text_units_section_list.append(
+        text_units_context.append(
-            [i + 1, t["content"], t.get("file_path", "unknown_source")]
+            {
                "id": i + 1,
                "content": t["content"],
                "file_path": t.get("file_path", "unknown_source"),
            }
        )
    text_units_context = list_of_list_to_dict(text_units_section_list)
    return entities_context, relations_context, text_units_context
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
        f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
    )
-    relations_section_list = [
+    relations_context = []
        [
            "id",
            "entity1",
            "entity2",
            "description",
            "keywords",
            "weight",
            "rank",
            "created_at",
            "file_path",
        ]
    ]
    for i, e in enumerate(edge_datas):
        created_at = e.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
        # Get file path from edge data
        file_path = e.get("file_path", "unknown_source")
-        relations_section_list.append(
+        relations_context.append(
-            [
+            {
-                i + 1,
+                "id": i + 1,
-                e["src_id"],
+                "entity1": e["src_id"],
-                e["tgt_id"],
+                "entity2": e["tgt_id"],
-                e["description"],
+                "description": e["description"],
-                e["keywords"],
+                "keywords": e["keywords"],
-                e["weight"],
+                "weight": e["weight"],
-                e["rank"],
+                "rank": e["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    relations_context = list_of_list_to_dict(relations_section_list)
-    entites_section_list = [
+    entities_context = []
        ["id", "entity", "type", "description", "rank", "created_at", "file_path"]
    ]
    for i, n in enumerate(use_entities):
        created_at = n.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
        # Get file path from node data
        file_path = n.get("file_path", "unknown_source")
-        entites_section_list.append(
+        entities_context.append(
-            [
+            {
-                i + 1,
+                "id": i + 1,
-                n["entity_name"],
+                "entity": n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
+                "type": n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
-                n["rank"],
+                "rank": n["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    entities_context = list_of_list_to_dict(entites_section_list)
-    text_units_section_list = [["id", "content", "file_path"]]
+    text_units_context = []
    for i, t in enumerate(use_text_units):
-        text_units_section_list.append(
+        text_units_context.append(
-            [i + 1, t["content"], t.get("file_path", "unknown")]
+            {
                "id": i + 1,
                "content": t["content"],
                "file_path": t.get("file_path", "unknown"),
            }
        )
    text_units_context = list_of_list_to_dict(text_units_section_list)
    return entities_context, relations_context, text_units_context
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
    return list_data
 def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
    """Convert a 2D string list (table-like data) into a list of dictionaries.
    The first row is treated as header containing field names. Subsequent rows become
    dictionary entries where keys come from header and values from row data.
    Args:
        data: 2D string array where first row contains headers and rest are data rows.
              Minimum 2 columns required in data rows (rows with <2 elements are skipped).
    Returns:
        List of dictionaries where each dict represents a data row with:
        - Keys: Header values from first row
        - Values: Corresponding row values (empty string if missing)
    Example:
        Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
        Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
    """
    if not data or len(data) <= 1:
        return []
    header = data[0]
    result = []
    for row in data[1:]:
        if len(row) >= 2:
            item = {}
            for i, field_name in enumerate(header):
                if i < len(row):
                    item[field_name] = str(row[i])
                else:
                    item[field_name] = ""
            result.append(item)
    return result
 def save_data_to_file(data, file_name):
    with open(file_name, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)