Remove list_of_list_to_dict function

This commit is contained in:
yangdx
2025-05-07 18:01:23 +08:00
parent 156244e260
commit 3eb3b170ab
2 changed files with 70 additions and 148 deletions

View File

@@ -25,7 +25,6 @@ from .utils import (
CacheData,
get_conversation_turns,
use_llm_func_with_cache,
list_of_list_to_dict,
)
from .base import (
BaseGraphStorage,
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
entities_context = []
relations_context = []
# Create text_units_context in the same format as _get_edge_data and _get_node_data
text_units_section_list = [["id", "content", "file_path"]]
# Create text_units_context directly as a list of dictionaries
text_units_context = []
for i, chunk in enumerate(maybe_trun_chunks):
# Add to text_units_section_list
text_units_section_list.append(
[
i + 1, # id
chunk["content"], # content
chunk["file_path"], # file_path
]
text_units_context.append(
{
"id": i + 1,
"content": chunk["content"],
"file_path": chunk["file_path"],
}
)
# Convert to dictionary format using list_of_list_to_dict
text_units_context = list_of_list_to_dict(text_units_section_list)
return entities_context, relations_context, text_units_context
except Exception as e:
logger.error(f"Error in _get_vector_context: {e}")
@@ -1398,17 +1392,7 @@ async def _get_node_data(
)
# build prompt
entites_section_list = [
[
"id",
"entity",
"type",
"description",
"rank",
"created_at",
"file_path",
]
]
entities_context = []
for i, n in enumerate(node_datas):
created_at = n.get("created_at", "UNKNOWN")
if isinstance(created_at, (int, float)):
@@ -1417,32 +1401,19 @@ async def _get_node_data(
# Get file path from node data
file_path = n.get("file_path", "unknown_source")
entites_section_list.append(
[
i + 1,
n["entity_name"],
n.get("entity_type", "UNKNOWN"),
n.get("description", "UNKNOWN"),
n["rank"],
created_at,
file_path,
]
entities_context.append(
{
"id": i + 1,
"entity": n["entity_name"],
"type": n.get("entity_type", "UNKNOWN"),
"description": n.get("description", "UNKNOWN"),
"rank": n["rank"],
"created_at": created_at,
"file_path": file_path,
}
)
entities_context = list_of_list_to_dict(entites_section_list)
relations_section_list = [
[
"id",
"entity1",
"entity2",
"description",
"keywords",
"weight",
"rank",
"created_at",
"file_path",
]
]
relations_context = []
for i, e in enumerate(use_relations):
created_at = e.get("created_at", "UNKNOWN")
# Convert timestamp to readable format
@@ -1452,27 +1423,29 @@ async def _get_node_data(
# Get file path from edge data
file_path = e.get("file_path", "unknown_source")
relations_section_list.append(
[
i + 1,
e["src_tgt"][0],
e["src_tgt"][1],
e["description"],
e["keywords"],
e["weight"],
e["rank"],
created_at,
file_path,
]
relations_context.append(
{
"id": i + 1,
"entity1": e["src_tgt"][0],
"entity2": e["src_tgt"][1],
"description": e["description"],
"keywords": e["keywords"],
"weight": e["weight"],
"rank": e["rank"],
"created_at": created_at,
"file_path": file_path,
}
)
relations_context = list_of_list_to_dict(relations_section_list)
text_units_section_list = [["id", "content", "file_path"]]
text_units_context = []
for i, t in enumerate(use_text_units):
text_units_section_list.append(
[i + 1, t["content"], t.get("file_path", "unknown_source")]
text_units_context.append(
{
"id": i + 1,
"content": t["content"],
"file_path": t.get("file_path", "unknown_source"),
}
)
text_units_context = list_of_list_to_dict(text_units_section_list)
return entities_context, relations_context, text_units_context
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
)
relations_section_list = [
[
"id",
"entity1",
"entity2",
"description",
"keywords",
"weight",
"rank",
"created_at",
"file_path",
]
]
relations_context = []
for i, e in enumerate(edge_datas):
created_at = e.get("created_at", "UNKNOWN")
# Convert timestamp to readable format
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
# Get file path from edge data
file_path = e.get("file_path", "unknown_source")
relations_section_list.append(
[
i + 1,
e["src_id"],
e["tgt_id"],
e["description"],
e["keywords"],
e["weight"],
e["rank"],
created_at,
file_path,
]
relations_context.append(
{
"id": i + 1,
"entity1": e["src_id"],
"entity2": e["tgt_id"],
"description": e["description"],
"keywords": e["keywords"],
"weight": e["weight"],
"rank": e["rank"],
"created_at": created_at,
"file_path": file_path,
}
)
relations_context = list_of_list_to_dict(relations_section_list)
entites_section_list = [
["id", "entity", "type", "description", "rank", "created_at", "file_path"]
]
entities_context = []
for i, n in enumerate(use_entities):
created_at = n.get("created_at", "UNKNOWN")
# Convert timestamp to readable format
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
# Get file path from node data
file_path = n.get("file_path", "unknown_source")
entites_section_list.append(
[
i + 1,
n["entity_name"],
n.get("entity_type", "UNKNOWN"),
n.get("description", "UNKNOWN"),
n["rank"],
created_at,
file_path,
]
entities_context.append(
{
"id": i + 1,
"entity": n["entity_name"],
"type": n.get("entity_type", "UNKNOWN"),
"description": n.get("description", "UNKNOWN"),
"rank": n["rank"],
"created_at": created_at,
"file_path": file_path,
}
)
entities_context = list_of_list_to_dict(entites_section_list)
text_units_section_list = [["id", "content", "file_path"]]
text_units_context = []
for i, t in enumerate(use_text_units):
text_units_section_list.append(
[i + 1, t["content"], t.get("file_path", "unknown")]
text_units_context.append(
{
"id": i + 1,
"content": t["content"],
"file_path": t.get("file_path", "unknown"),
}
)
text_units_context = list_of_list_to_dict(text_units_section_list)
return entities_context, relations_context, text_units_context

View File

@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
return list_data
def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
"""Convert a 2D string list (table-like data) into a list of dictionaries.
The first row is treated as header containing field names. Subsequent rows become
dictionary entries where keys come from header and values from row data.
Args:
data: 2D string array where first row contains headers and rest are data rows.
Minimum 2 columns required in data rows (rows with <2 elements are skipped).
Returns:
List of dictionaries where each dict represents a data row with:
- Keys: Header values from first row
- Values: Corresponding row values (empty string if missing)
Example:
Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
"""
if not data or len(data) <= 1:
return []
header = data[0]
result = []
for row in data[1:]:
if len(row) >= 2:
item = {}
for i, field_name in enumerate(header):
if i < len(row):
item[field_name] = str(row[i])
else:
item[field_name] = ""
result.append(item)
return result
def save_data_to_file(data, file_name):
with open(file_name, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)