Remove list_of_list_to_dict function

2025-05-07 18:01:23 +08:00
parent 156244e260
commit 3eb3b170ab
2 changed files with 70 additions and 148 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -25,7 +25,6 @@ from .utils import (
    CacheData,
    get_conversation_turns,
    use_llm_func_with_cache,
-    list_of_list_to_dict,
 )
 from .base import (
    BaseGraphStorage,
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
        entities_context = []
        relations_context = []

-        # Create text_units_context in the same format as _get_edge_data and _get_node_data
-        text_units_section_list = [["id", "content", "file_path"]]
-
+        # Create text_units_context directly as a list of dictionaries
+        text_units_context = []
        for i, chunk in enumerate(maybe_trun_chunks):
-            # Add to text_units_section_list
-            text_units_section_list.append(
-                [
-                    i + 1,  # id
-                    chunk["content"],  # content
-                    chunk["file_path"],  # file_path
-                ]
+            text_units_context.append(
+                {
+                    "id": i + 1,
+                    "content": chunk["content"],
+                    "file_path": chunk["file_path"],
+                }
            )

-        # Convert to dictionary format using list_of_list_to_dict
-        text_units_context = list_of_list_to_dict(text_units_section_list)
-
        return entities_context, relations_context, text_units_context
    except Exception as e:
        logger.error(f"Error in _get_vector_context: {e}")
@@ -1398,17 +1392,7 @@ async def _get_node_data(
    )

    # build prompt
-    entites_section_list = [
-        [
-            "id",
-            "entity",
-            "type",
-            "description",
-            "rank",
-            "created_at",
-            "file_path",
-        ]
-    ]
+    entities_context = []
    for i, n in enumerate(node_datas):
        created_at = n.get("created_at", "UNKNOWN")
        if isinstance(created_at, (int, float)):
@@ -1417,32 +1401,19 @@ async def _get_node_data(
        # Get file path from node data
        file_path = n.get("file_path", "unknown_source")

-        entites_section_list.append(
-            [
-                i + 1,
-                n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
-                n["rank"],
-                created_at,
-                file_path,
-            ]
+        entities_context.append(
+            {
+                "id": i + 1,
+                "entity": n["entity_name"],
+                "type": n.get("entity_type", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
+                "rank": n["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
        )
-    entities_context = list_of_list_to_dict(entites_section_list)

-    relations_section_list = [
-        [
-            "id",
-            "entity1",
-            "entity2",
-            "description",
-            "keywords",
-            "weight",
-            "rank",
-            "created_at",
-            "file_path",
-        ]
-    ]
+    relations_context = []
    for i, e in enumerate(use_relations):
        created_at = e.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1452,27 +1423,29 @@ async def _get_node_data(
        # Get file path from edge data
        file_path = e.get("file_path", "unknown_source")

-        relations_section_list.append(
-            [
-                i + 1,
-                e["src_tgt"][0],
-                e["src_tgt"][1],
-                e["description"],
-                e["keywords"],
-                e["weight"],
-                e["rank"],
-                created_at,
-                file_path,
-            ]
+        relations_context.append(
+            {
+                "id": i + 1,
+                "entity1": e["src_tgt"][0],
+                "entity2": e["src_tgt"][1],
+                "description": e["description"],
+                "keywords": e["keywords"],
+                "weight": e["weight"],
+                "rank": e["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
        )
-    relations_context = list_of_list_to_dict(relations_section_list)

-    text_units_section_list = [["id", "content", "file_path"]]
+    text_units_context = []
    for i, t in enumerate(use_text_units):
-        text_units_section_list.append(
-            [i + 1, t["content"], t.get("file_path", "unknown_source")]
+        text_units_context.append(
+            {
+                "id": i + 1,
+                "content": t["content"],
+                "file_path": t.get("file_path", "unknown_source"),
+            }
        )
-    text_units_context = list_of_list_to_dict(text_units_section_list)
    return entities_context, relations_context, text_units_context


@@ -1715,19 +1688,7 @@ async def _get_edge_data(
        f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
    )

-    relations_section_list = [
-        [
-            "id",
-            "entity1",
-            "entity2",
-            "description",
-            "keywords",
-            "weight",
-            "rank",
-            "created_at",
-            "file_path",
-        ]
-    ]
+    relations_context = []
    for i, e in enumerate(edge_datas):
        created_at = e.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
        # Get file path from edge data
        file_path = e.get("file_path", "unknown_source")

-        relations_section_list.append(
-            [
-                i + 1,
-                e["src_id"],
-                e["tgt_id"],
-                e["description"],
-                e["keywords"],
-                e["weight"],
-                e["rank"],
-                created_at,
-                file_path,
-            ]
+        relations_context.append(
+            {
+                "id": i + 1,
+                "entity1": e["src_id"],
+                "entity2": e["tgt_id"],
+                "description": e["description"],
+                "keywords": e["keywords"],
+                "weight": e["weight"],
+                "rank": e["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
        )
-    relations_context = list_of_list_to_dict(relations_section_list)

-    entites_section_list = [
-        ["id", "entity", "type", "description", "rank", "created_at", "file_path"]
-    ]
+    entities_context = []
    for i, n in enumerate(use_entities):
        created_at = n.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
        # Get file path from node data
        file_path = n.get("file_path", "unknown_source")

-        entites_section_list.append(
-            [
-                i + 1,
-                n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
-                n["rank"],
-                created_at,
-                file_path,
-            ]
+        entities_context.append(
+            {
+                "id": i + 1,
+                "entity": n["entity_name"],
+                "type": n.get("entity_type", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
+                "rank": n["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
        )
-    entities_context = list_of_list_to_dict(entites_section_list)

-    text_units_section_list = [["id", "content", "file_path"]]
+    text_units_context = []
    for i, t in enumerate(use_text_units):
-        text_units_section_list.append(
-            [i + 1, t["content"], t.get("file_path", "unknown")]
+        text_units_context.append(
+            {
+                "id": i + 1,
+                "content": t["content"],
+                "file_path": t.get("file_path", "unknown"),
+            }
        )
-    text_units_context = list_of_list_to_dict(text_units_section_list)
    return entities_context, relations_context, text_units_context


--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
    return list_data


-def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
-    """Convert a 2D string list (table-like data) into a list of dictionaries.
-
-    The first row is treated as header containing field names. Subsequent rows become
-    dictionary entries where keys come from header and values from row data.
-
-    Args:
-        data: 2D string array where first row contains headers and rest are data rows.
-              Minimum 2 columns required in data rows (rows with <2 elements are skipped).
-
-    Returns:
-        List of dictionaries where each dict represents a data row with:
-        - Keys: Header values from first row
-        - Values: Corresponding row values (empty string if missing)
-
-    Example:
-        Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
-        Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
-    """
-    if not data or len(data) <= 1:
-        return []
-
-    header = data[0]
-    result = []
-
-    for row in data[1:]:
-        if len(row) >= 2:
-            item = {}
-            for i, field_name in enumerate(header):
-                if i < len(row):
-                    item[field_name] = str(row[i])
-                else:
-                    item[field_name] = ""
-            result.append(item)
-
-    return result
-
-
 def save_data_to_file(data, file_name):
    with open(file_name, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)