From 3eb3b170ab37dbcc9f8cf6c62f05c2a91f19cbef Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 7 May 2025 18:01:23 +0800 Subject: [PATCH] Remove list_of_list_to_dict function --- lightrag/operate.py | 180 +++++++++++++++++--------------------------- lightrag/utils.py | 38 ---------- 2 files changed, 70 insertions(+), 148 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 0ff485a8..08296bdb 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -25,7 +25,6 @@ from .utils import ( CacheData, get_conversation_turns, use_llm_func_with_cache, - list_of_list_to_dict, ) from .base import ( BaseGraphStorage, @@ -1175,22 +1174,17 @@ async def _get_vector_context( entities_context = [] relations_context = [] - # Create text_units_context in the same format as _get_edge_data and _get_node_data - text_units_section_list = [["id", "content", "file_path"]] - + # Create text_units_context directly as a list of dictionaries + text_units_context = [] for i, chunk in enumerate(maybe_trun_chunks): - # Add to text_units_section_list - text_units_section_list.append( - [ - i + 1, # id - chunk["content"], # content - chunk["file_path"], # file_path - ] + text_units_context.append( + { + "id": i + 1, + "content": chunk["content"], + "file_path": chunk["file_path"], + } ) - # Convert to dictionary format using list_of_list_to_dict - text_units_context = list_of_list_to_dict(text_units_section_list) - return entities_context, relations_context, text_units_context except Exception as e: logger.error(f"Error in _get_vector_context: {e}") @@ -1398,17 +1392,7 @@ async def _get_node_data( ) # build prompt - entites_section_list = [ - [ - "id", - "entity", - "type", - "description", - "rank", - "created_at", - "file_path", - ] - ] + entities_context = [] for i, n in enumerate(node_datas): created_at = n.get("created_at", "UNKNOWN") if isinstance(created_at, (int, float)): @@ -1417,32 +1401,19 @@ async def _get_node_data( # Get file path from node data file_path = n.get("file_path", "unknown_source") - entites_section_list.append( - [ - i + 1, - n["entity_name"], - n.get("entity_type", "UNKNOWN"), - n.get("description", "UNKNOWN"), - n["rank"], - created_at, - file_path, - ] + entities_context.append( + { + "id": i + 1, + "entity": n["entity_name"], + "type": n.get("entity_type", "UNKNOWN"), + "description": n.get("description", "UNKNOWN"), + "rank": n["rank"], + "created_at": created_at, + "file_path": file_path, + } ) - entities_context = list_of_list_to_dict(entites_section_list) - relations_section_list = [ - [ - "id", - "entity1", - "entity2", - "description", - "keywords", - "weight", - "rank", - "created_at", - "file_path", - ] - ] + relations_context = [] for i, e in enumerate(use_relations): created_at = e.get("created_at", "UNKNOWN") # Convert timestamp to readable format @@ -1452,27 +1423,29 @@ async def _get_node_data( # Get file path from edge data file_path = e.get("file_path", "unknown_source") - relations_section_list.append( - [ - i + 1, - e["src_tgt"][0], - e["src_tgt"][1], - e["description"], - e["keywords"], - e["weight"], - e["rank"], - created_at, - file_path, - ] + relations_context.append( + { + "id": i + 1, + "entity1": e["src_tgt"][0], + "entity2": e["src_tgt"][1], + "description": e["description"], + "keywords": e["keywords"], + "weight": e["weight"], + "rank": e["rank"], + "created_at": created_at, + "file_path": file_path, + } ) - relations_context = list_of_list_to_dict(relations_section_list) - text_units_section_list = [["id", "content", "file_path"]] + text_units_context = [] for i, t in enumerate(use_text_units): - text_units_section_list.append( - [i + 1, t["content"], t.get("file_path", "unknown_source")] + text_units_context.append( + { + "id": i + 1, + "content": t["content"], + "file_path": t.get("file_path", "unknown_source"), + } ) - text_units_context = list_of_list_to_dict(text_units_section_list) return entities_context, relations_context, text_units_context @@ -1715,19 +1688,7 @@ async def _get_edge_data( f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks" ) - relations_section_list = [ - [ - "id", - "entity1", - "entity2", - "description", - "keywords", - "weight", - "rank", - "created_at", - "file_path", - ] - ] + relations_context = [] for i, e in enumerate(edge_datas): created_at = e.get("created_at", "UNKNOWN") # Convert timestamp to readable format @@ -1737,24 +1698,21 @@ async def _get_edge_data( # Get file path from edge data file_path = e.get("file_path", "unknown_source") - relations_section_list.append( - [ - i + 1, - e["src_id"], - e["tgt_id"], - e["description"], - e["keywords"], - e["weight"], - e["rank"], - created_at, - file_path, - ] + relations_context.append( + { + "id": i + 1, + "entity1": e["src_id"], + "entity2": e["tgt_id"], + "description": e["description"], + "keywords": e["keywords"], + "weight": e["weight"], + "rank": e["rank"], + "created_at": created_at, + "file_path": file_path, + } ) - relations_context = list_of_list_to_dict(relations_section_list) - entites_section_list = [ - ["id", "entity", "type", "description", "rank", "created_at", "file_path"] - ] + entities_context = [] for i, n in enumerate(use_entities): created_at = n.get("created_at", "UNKNOWN") # Convert timestamp to readable format @@ -1764,25 +1722,27 @@ async def _get_edge_data( # Get file path from node data file_path = n.get("file_path", "unknown_source") - entites_section_list.append( - [ - i + 1, - n["entity_name"], - n.get("entity_type", "UNKNOWN"), - n.get("description", "UNKNOWN"), - n["rank"], - created_at, - file_path, - ] + entities_context.append( + { + "id": i + 1, + "entity": n["entity_name"], + "type": n.get("entity_type", "UNKNOWN"), + "description": n.get("description", "UNKNOWN"), + "rank": n["rank"], + "created_at": created_at, + "file_path": file_path, + } ) - entities_context = list_of_list_to_dict(entites_section_list) - text_units_section_list = [["id", "content", "file_path"]] + text_units_context = [] for i, t in enumerate(use_text_units): - text_units_section_list.append( - [i + 1, t["content"], t.get("file_path", "unknown")] + text_units_context.append( + { + "id": i + 1, + "content": t["content"], + "file_path": t.get("file_path", "unknown"), + } ) - text_units_context = list_of_list_to_dict(text_units_section_list) return entities_context, relations_context, text_units_context diff --git a/lightrag/utils.py b/lightrag/utils.py index 7b4920eb..5e252de1 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -719,44 +719,6 @@ def truncate_list_by_token_size( return list_data -def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]: - """Convert a 2D string list (table-like data) into a list of dictionaries. - - The first row is treated as header containing field names. Subsequent rows become - dictionary entries where keys come from header and values from row data. - - Args: - data: 2D string array where first row contains headers and rest are data rows. - Minimum 2 columns required in data rows (rows with <2 elements are skipped). - - Returns: - List of dictionaries where each dict represents a data row with: - - Keys: Header values from first row - - Values: Corresponding row values (empty string if missing) - - Example: - Input: [["Name","Age"], ["Alice","23"], ["Bob"]] - Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}] - """ - if not data or len(data) <= 1: - return [] - - header = data[0] - result = [] - - for row in data[1:]: - if len(row) >= 2: - item = {} - for i, field_name in enumerate(header): - if i < len(row): - item[field_name] = str(row[i]) - else: - item[field_name] = "" - result.append(item) - - return result - - def save_data_to_file(data, file_name): with open(file_name, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=4)