Remove list_of_list_to_dict function
This commit is contained in:
@@ -25,7 +25,6 @@ from .utils import (
|
||||
CacheData,
|
||||
get_conversation_turns,
|
||||
use_llm_func_with_cache,
|
||||
list_of_list_to_dict,
|
||||
)
|
||||
from .base import (
|
||||
BaseGraphStorage,
|
||||
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
|
||||
entities_context = []
|
||||
relations_context = []
|
||||
|
||||
# Create text_units_context in the same format as _get_edge_data and _get_node_data
|
||||
text_units_section_list = [["id", "content", "file_path"]]
|
||||
|
||||
# Create text_units_context directly as a list of dictionaries
|
||||
text_units_context = []
|
||||
for i, chunk in enumerate(maybe_trun_chunks):
|
||||
# Add to text_units_section_list
|
||||
text_units_section_list.append(
|
||||
[
|
||||
i + 1, # id
|
||||
chunk["content"], # content
|
||||
chunk["file_path"], # file_path
|
||||
]
|
||||
text_units_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"content": chunk["content"],
|
||||
"file_path": chunk["file_path"],
|
||||
}
|
||||
)
|
||||
|
||||
# Convert to dictionary format using list_of_list_to_dict
|
||||
text_units_context = list_of_list_to_dict(text_units_section_list)
|
||||
|
||||
return entities_context, relations_context, text_units_context
|
||||
except Exception as e:
|
||||
logger.error(f"Error in _get_vector_context: {e}")
|
||||
@@ -1398,17 +1392,7 @@ async def _get_node_data(
|
||||
)
|
||||
|
||||
# build prompt
|
||||
entites_section_list = [
|
||||
[
|
||||
"id",
|
||||
"entity",
|
||||
"type",
|
||||
"description",
|
||||
"rank",
|
||||
"created_at",
|
||||
"file_path",
|
||||
]
|
||||
]
|
||||
entities_context = []
|
||||
for i, n in enumerate(node_datas):
|
||||
created_at = n.get("created_at", "UNKNOWN")
|
||||
if isinstance(created_at, (int, float)):
|
||||
@@ -1417,32 +1401,19 @@ async def _get_node_data(
|
||||
# Get file path from node data
|
||||
file_path = n.get("file_path", "unknown_source")
|
||||
|
||||
entites_section_list.append(
|
||||
[
|
||||
i + 1,
|
||||
n["entity_name"],
|
||||
n.get("entity_type", "UNKNOWN"),
|
||||
n.get("description", "UNKNOWN"),
|
||||
n["rank"],
|
||||
created_at,
|
||||
file_path,
|
||||
]
|
||||
entities_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"entity": n["entity_name"],
|
||||
"type": n.get("entity_type", "UNKNOWN"),
|
||||
"description": n.get("description", "UNKNOWN"),
|
||||
"rank": n["rank"],
|
||||
"created_at": created_at,
|
||||
"file_path": file_path,
|
||||
}
|
||||
)
|
||||
entities_context = list_of_list_to_dict(entites_section_list)
|
||||
|
||||
relations_section_list = [
|
||||
[
|
||||
"id",
|
||||
"entity1",
|
||||
"entity2",
|
||||
"description",
|
||||
"keywords",
|
||||
"weight",
|
||||
"rank",
|
||||
"created_at",
|
||||
"file_path",
|
||||
]
|
||||
]
|
||||
relations_context = []
|
||||
for i, e in enumerate(use_relations):
|
||||
created_at = e.get("created_at", "UNKNOWN")
|
||||
# Convert timestamp to readable format
|
||||
@@ -1452,27 +1423,29 @@ async def _get_node_data(
|
||||
# Get file path from edge data
|
||||
file_path = e.get("file_path", "unknown_source")
|
||||
|
||||
relations_section_list.append(
|
||||
[
|
||||
i + 1,
|
||||
e["src_tgt"][0],
|
||||
e["src_tgt"][1],
|
||||
e["description"],
|
||||
e["keywords"],
|
||||
e["weight"],
|
||||
e["rank"],
|
||||
created_at,
|
||||
file_path,
|
||||
]
|
||||
relations_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"entity1": e["src_tgt"][0],
|
||||
"entity2": e["src_tgt"][1],
|
||||
"description": e["description"],
|
||||
"keywords": e["keywords"],
|
||||
"weight": e["weight"],
|
||||
"rank": e["rank"],
|
||||
"created_at": created_at,
|
||||
"file_path": file_path,
|
||||
}
|
||||
)
|
||||
relations_context = list_of_list_to_dict(relations_section_list)
|
||||
|
||||
text_units_section_list = [["id", "content", "file_path"]]
|
||||
text_units_context = []
|
||||
for i, t in enumerate(use_text_units):
|
||||
text_units_section_list.append(
|
||||
[i + 1, t["content"], t.get("file_path", "unknown_source")]
|
||||
text_units_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"content": t["content"],
|
||||
"file_path": t.get("file_path", "unknown_source"),
|
||||
}
|
||||
)
|
||||
text_units_context = list_of_list_to_dict(text_units_section_list)
|
||||
return entities_context, relations_context, text_units_context
|
||||
|
||||
|
||||
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
|
||||
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
||||
)
|
||||
|
||||
relations_section_list = [
|
||||
[
|
||||
"id",
|
||||
"entity1",
|
||||
"entity2",
|
||||
"description",
|
||||
"keywords",
|
||||
"weight",
|
||||
"rank",
|
||||
"created_at",
|
||||
"file_path",
|
||||
]
|
||||
]
|
||||
relations_context = []
|
||||
for i, e in enumerate(edge_datas):
|
||||
created_at = e.get("created_at", "UNKNOWN")
|
||||
# Convert timestamp to readable format
|
||||
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
|
||||
# Get file path from edge data
|
||||
file_path = e.get("file_path", "unknown_source")
|
||||
|
||||
relations_section_list.append(
|
||||
[
|
||||
i + 1,
|
||||
e["src_id"],
|
||||
e["tgt_id"],
|
||||
e["description"],
|
||||
e["keywords"],
|
||||
e["weight"],
|
||||
e["rank"],
|
||||
created_at,
|
||||
file_path,
|
||||
]
|
||||
relations_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"entity1": e["src_id"],
|
||||
"entity2": e["tgt_id"],
|
||||
"description": e["description"],
|
||||
"keywords": e["keywords"],
|
||||
"weight": e["weight"],
|
||||
"rank": e["rank"],
|
||||
"created_at": created_at,
|
||||
"file_path": file_path,
|
||||
}
|
||||
)
|
||||
relations_context = list_of_list_to_dict(relations_section_list)
|
||||
|
||||
entites_section_list = [
|
||||
["id", "entity", "type", "description", "rank", "created_at", "file_path"]
|
||||
]
|
||||
entities_context = []
|
||||
for i, n in enumerate(use_entities):
|
||||
created_at = n.get("created_at", "UNKNOWN")
|
||||
# Convert timestamp to readable format
|
||||
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
|
||||
# Get file path from node data
|
||||
file_path = n.get("file_path", "unknown_source")
|
||||
|
||||
entites_section_list.append(
|
||||
[
|
||||
i + 1,
|
||||
n["entity_name"],
|
||||
n.get("entity_type", "UNKNOWN"),
|
||||
n.get("description", "UNKNOWN"),
|
||||
n["rank"],
|
||||
created_at,
|
||||
file_path,
|
||||
]
|
||||
entities_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"entity": n["entity_name"],
|
||||
"type": n.get("entity_type", "UNKNOWN"),
|
||||
"description": n.get("description", "UNKNOWN"),
|
||||
"rank": n["rank"],
|
||||
"created_at": created_at,
|
||||
"file_path": file_path,
|
||||
}
|
||||
)
|
||||
entities_context = list_of_list_to_dict(entites_section_list)
|
||||
|
||||
text_units_section_list = [["id", "content", "file_path"]]
|
||||
text_units_context = []
|
||||
for i, t in enumerate(use_text_units):
|
||||
text_units_section_list.append(
|
||||
[i + 1, t["content"], t.get("file_path", "unknown")]
|
||||
text_units_context.append(
|
||||
{
|
||||
"id": i + 1,
|
||||
"content": t["content"],
|
||||
"file_path": t.get("file_path", "unknown"),
|
||||
}
|
||||
)
|
||||
text_units_context = list_of_list_to_dict(text_units_section_list)
|
||||
return entities_context, relations_context, text_units_context
|
||||
|
||||
|
||||
|
@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
|
||||
return list_data
|
||||
|
||||
|
||||
def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
|
||||
"""Convert a 2D string list (table-like data) into a list of dictionaries.
|
||||
|
||||
The first row is treated as header containing field names. Subsequent rows become
|
||||
dictionary entries where keys come from header and values from row data.
|
||||
|
||||
Args:
|
||||
data: 2D string array where first row contains headers and rest are data rows.
|
||||
Minimum 2 columns required in data rows (rows with <2 elements are skipped).
|
||||
|
||||
Returns:
|
||||
List of dictionaries where each dict represents a data row with:
|
||||
- Keys: Header values from first row
|
||||
- Values: Corresponding row values (empty string if missing)
|
||||
|
||||
Example:
|
||||
Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
|
||||
Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
|
||||
"""
|
||||
if not data or len(data) <= 1:
|
||||
return []
|
||||
|
||||
header = data[0]
|
||||
result = []
|
||||
|
||||
for row in data[1:]:
|
||||
if len(row) >= 2:
|
||||
item = {}
|
||||
for i, field_name in enumerate(header):
|
||||
if i < len(row):
|
||||
item[field_name] = str(row[i])
|
||||
else:
|
||||
item[field_name] = ""
|
||||
result.append(item)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def save_data_to_file(data, file_name):
|
||||
with open(file_name, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
Reference in New Issue
Block a user