Remove list_of_list_to_dict function

This commit is contained in:
yangdx
2025-05-07 18:01:23 +08:00
parent 156244e260
commit 3eb3b170ab
2 changed files with 70 additions and 148 deletions

View File

@@ -25,7 +25,6 @@ from .utils import (
CacheData, CacheData,
get_conversation_turns, get_conversation_turns,
use_llm_func_with_cache, use_llm_func_with_cache,
list_of_list_to_dict,
) )
from .base import ( from .base import (
BaseGraphStorage, BaseGraphStorage,
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
entities_context = [] entities_context = []
relations_context = [] relations_context = []
# Create text_units_context in the same format as _get_edge_data and _get_node_data # Create text_units_context directly as a list of dictionaries
text_units_section_list = [["id", "content", "file_path"]] text_units_context = []
for i, chunk in enumerate(maybe_trun_chunks): for i, chunk in enumerate(maybe_trun_chunks):
# Add to text_units_section_list text_units_context.append(
text_units_section_list.append( {
[ "id": i + 1,
i + 1, # id "content": chunk["content"],
chunk["content"], # content "file_path": chunk["file_path"],
chunk["file_path"], # file_path }
]
) )
# Convert to dictionary format using list_of_list_to_dict
text_units_context = list_of_list_to_dict(text_units_section_list)
return entities_context, relations_context, text_units_context return entities_context, relations_context, text_units_context
except Exception as e: except Exception as e:
logger.error(f"Error in _get_vector_context: {e}") logger.error(f"Error in _get_vector_context: {e}")
@@ -1398,17 +1392,7 @@ async def _get_node_data(
) )
# build prompt # build prompt
entites_section_list = [ entities_context = []
[
"id",
"entity",
"type",
"description",
"rank",
"created_at",
"file_path",
]
]
for i, n in enumerate(node_datas): for i, n in enumerate(node_datas):
created_at = n.get("created_at", "UNKNOWN") created_at = n.get("created_at", "UNKNOWN")
if isinstance(created_at, (int, float)): if isinstance(created_at, (int, float)):
@@ -1417,32 +1401,19 @@ async def _get_node_data(
# Get file path from node data # Get file path from node data
file_path = n.get("file_path", "unknown_source") file_path = n.get("file_path", "unknown_source")
entites_section_list.append( entities_context.append(
[ {
i + 1, "id": i + 1,
n["entity_name"], "entity": n["entity_name"],
n.get("entity_type", "UNKNOWN"), "type": n.get("entity_type", "UNKNOWN"),
n.get("description", "UNKNOWN"), "description": n.get("description", "UNKNOWN"),
n["rank"], "rank": n["rank"],
created_at, "created_at": created_at,
file_path, "file_path": file_path,
] }
) )
entities_context = list_of_list_to_dict(entites_section_list)
relations_section_list = [ relations_context = []
[
"id",
"entity1",
"entity2",
"description",
"keywords",
"weight",
"rank",
"created_at",
"file_path",
]
]
for i, e in enumerate(use_relations): for i, e in enumerate(use_relations):
created_at = e.get("created_at", "UNKNOWN") created_at = e.get("created_at", "UNKNOWN")
# Convert timestamp to readable format # Convert timestamp to readable format
@@ -1452,27 +1423,29 @@ async def _get_node_data(
# Get file path from edge data # Get file path from edge data
file_path = e.get("file_path", "unknown_source") file_path = e.get("file_path", "unknown_source")
relations_section_list.append( relations_context.append(
[ {
i + 1, "id": i + 1,
e["src_tgt"][0], "entity1": e["src_tgt"][0],
e["src_tgt"][1], "entity2": e["src_tgt"][1],
e["description"], "description": e["description"],
e["keywords"], "keywords": e["keywords"],
e["weight"], "weight": e["weight"],
e["rank"], "rank": e["rank"],
created_at, "created_at": created_at,
file_path, "file_path": file_path,
] }
) )
relations_context = list_of_list_to_dict(relations_section_list)
text_units_section_list = [["id", "content", "file_path"]] text_units_context = []
for i, t in enumerate(use_text_units): for i, t in enumerate(use_text_units):
text_units_section_list.append( text_units_context.append(
[i + 1, t["content"], t.get("file_path", "unknown_source")] {
"id": i + 1,
"content": t["content"],
"file_path": t.get("file_path", "unknown_source"),
}
) )
text_units_context = list_of_list_to_dict(text_units_section_list)
return entities_context, relations_context, text_units_context return entities_context, relations_context, text_units_context
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks" f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
) )
relations_section_list = [ relations_context = []
[
"id",
"entity1",
"entity2",
"description",
"keywords",
"weight",
"rank",
"created_at",
"file_path",
]
]
for i, e in enumerate(edge_datas): for i, e in enumerate(edge_datas):
created_at = e.get("created_at", "UNKNOWN") created_at = e.get("created_at", "UNKNOWN")
# Convert timestamp to readable format # Convert timestamp to readable format
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
# Get file path from edge data # Get file path from edge data
file_path = e.get("file_path", "unknown_source") file_path = e.get("file_path", "unknown_source")
relations_section_list.append( relations_context.append(
[ {
i + 1, "id": i + 1,
e["src_id"], "entity1": e["src_id"],
e["tgt_id"], "entity2": e["tgt_id"],
e["description"], "description": e["description"],
e["keywords"], "keywords": e["keywords"],
e["weight"], "weight": e["weight"],
e["rank"], "rank": e["rank"],
created_at, "created_at": created_at,
file_path, "file_path": file_path,
] }
) )
relations_context = list_of_list_to_dict(relations_section_list)
entites_section_list = [ entities_context = []
["id", "entity", "type", "description", "rank", "created_at", "file_path"]
]
for i, n in enumerate(use_entities): for i, n in enumerate(use_entities):
created_at = n.get("created_at", "UNKNOWN") created_at = n.get("created_at", "UNKNOWN")
# Convert timestamp to readable format # Convert timestamp to readable format
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
# Get file path from node data # Get file path from node data
file_path = n.get("file_path", "unknown_source") file_path = n.get("file_path", "unknown_source")
entites_section_list.append( entities_context.append(
[ {
i + 1, "id": i + 1,
n["entity_name"], "entity": n["entity_name"],
n.get("entity_type", "UNKNOWN"), "type": n.get("entity_type", "UNKNOWN"),
n.get("description", "UNKNOWN"), "description": n.get("description", "UNKNOWN"),
n["rank"], "rank": n["rank"],
created_at, "created_at": created_at,
file_path, "file_path": file_path,
] }
) )
entities_context = list_of_list_to_dict(entites_section_list)
text_units_section_list = [["id", "content", "file_path"]] text_units_context = []
for i, t in enumerate(use_text_units): for i, t in enumerate(use_text_units):
text_units_section_list.append( text_units_context.append(
[i + 1, t["content"], t.get("file_path", "unknown")] {
"id": i + 1,
"content": t["content"],
"file_path": t.get("file_path", "unknown"),
}
) )
text_units_context = list_of_list_to_dict(text_units_section_list)
return entities_context, relations_context, text_units_context return entities_context, relations_context, text_units_context

View File

@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
return list_data return list_data
def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
"""Convert a 2D string list (table-like data) into a list of dictionaries.
The first row is treated as header containing field names. Subsequent rows become
dictionary entries where keys come from header and values from row data.
Args:
data: 2D string array where first row contains headers and rest are data rows.
Minimum 2 columns required in data rows (rows with <2 elements are skipped).
Returns:
List of dictionaries where each dict represents a data row with:
- Keys: Header values from first row
- Values: Corresponding row values (empty string if missing)
Example:
Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
"""
if not data or len(data) <= 1:
return []
header = data[0]
result = []
for row in data[1:]:
if len(row) >= 2:
item = {}
for i, field_name in enumerate(header):
if i < len(row):
item[field_name] = str(row[i])
else:
item[field_name] = ""
result.append(item)
return result
def save_data_to_file(data, file_name): def save_data_to_file(data, file_name):
with open(file_name, "w", encoding="utf-8") as f: with open(file_name, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4) json.dump(data, f, ensure_ascii=False, indent=4)