Remove list_of_list_to_dict function
This commit is contained in:
@@ -25,7 +25,6 @@ from .utils import (
|
|||||||
CacheData,
|
CacheData,
|
||||||
get_conversation_turns,
|
get_conversation_turns,
|
||||||
use_llm_func_with_cache,
|
use_llm_func_with_cache,
|
||||||
list_of_list_to_dict,
|
|
||||||
)
|
)
|
||||||
from .base import (
|
from .base import (
|
||||||
BaseGraphStorage,
|
BaseGraphStorage,
|
||||||
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
|
|||||||
entities_context = []
|
entities_context = []
|
||||||
relations_context = []
|
relations_context = []
|
||||||
|
|
||||||
# Create text_units_context in the same format as _get_edge_data and _get_node_data
|
# Create text_units_context directly as a list of dictionaries
|
||||||
text_units_section_list = [["id", "content", "file_path"]]
|
text_units_context = []
|
||||||
|
|
||||||
for i, chunk in enumerate(maybe_trun_chunks):
|
for i, chunk in enumerate(maybe_trun_chunks):
|
||||||
# Add to text_units_section_list
|
text_units_context.append(
|
||||||
text_units_section_list.append(
|
{
|
||||||
[
|
"id": i + 1,
|
||||||
i + 1, # id
|
"content": chunk["content"],
|
||||||
chunk["content"], # content
|
"file_path": chunk["file_path"],
|
||||||
chunk["file_path"], # file_path
|
}
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert to dictionary format using list_of_list_to_dict
|
|
||||||
text_units_context = list_of_list_to_dict(text_units_section_list)
|
|
||||||
|
|
||||||
return entities_context, relations_context, text_units_context
|
return entities_context, relations_context, text_units_context
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in _get_vector_context: {e}")
|
logger.error(f"Error in _get_vector_context: {e}")
|
||||||
@@ -1398,17 +1392,7 @@ async def _get_node_data(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# build prompt
|
# build prompt
|
||||||
entites_section_list = [
|
entities_context = []
|
||||||
[
|
|
||||||
"id",
|
|
||||||
"entity",
|
|
||||||
"type",
|
|
||||||
"description",
|
|
||||||
"rank",
|
|
||||||
"created_at",
|
|
||||||
"file_path",
|
|
||||||
]
|
|
||||||
]
|
|
||||||
for i, n in enumerate(node_datas):
|
for i, n in enumerate(node_datas):
|
||||||
created_at = n.get("created_at", "UNKNOWN")
|
created_at = n.get("created_at", "UNKNOWN")
|
||||||
if isinstance(created_at, (int, float)):
|
if isinstance(created_at, (int, float)):
|
||||||
@@ -1417,32 +1401,19 @@ async def _get_node_data(
|
|||||||
# Get file path from node data
|
# Get file path from node data
|
||||||
file_path = n.get("file_path", "unknown_source")
|
file_path = n.get("file_path", "unknown_source")
|
||||||
|
|
||||||
entites_section_list.append(
|
entities_context.append(
|
||||||
[
|
{
|
||||||
i + 1,
|
"id": i + 1,
|
||||||
n["entity_name"],
|
"entity": n["entity_name"],
|
||||||
n.get("entity_type", "UNKNOWN"),
|
"type": n.get("entity_type", "UNKNOWN"),
|
||||||
n.get("description", "UNKNOWN"),
|
"description": n.get("description", "UNKNOWN"),
|
||||||
n["rank"],
|
"rank": n["rank"],
|
||||||
created_at,
|
"created_at": created_at,
|
||||||
file_path,
|
"file_path": file_path,
|
||||||
]
|
}
|
||||||
)
|
)
|
||||||
entities_context = list_of_list_to_dict(entites_section_list)
|
|
||||||
|
|
||||||
relations_section_list = [
|
relations_context = []
|
||||||
[
|
|
||||||
"id",
|
|
||||||
"entity1",
|
|
||||||
"entity2",
|
|
||||||
"description",
|
|
||||||
"keywords",
|
|
||||||
"weight",
|
|
||||||
"rank",
|
|
||||||
"created_at",
|
|
||||||
"file_path",
|
|
||||||
]
|
|
||||||
]
|
|
||||||
for i, e in enumerate(use_relations):
|
for i, e in enumerate(use_relations):
|
||||||
created_at = e.get("created_at", "UNKNOWN")
|
created_at = e.get("created_at", "UNKNOWN")
|
||||||
# Convert timestamp to readable format
|
# Convert timestamp to readable format
|
||||||
@@ -1452,27 +1423,29 @@ async def _get_node_data(
|
|||||||
# Get file path from edge data
|
# Get file path from edge data
|
||||||
file_path = e.get("file_path", "unknown_source")
|
file_path = e.get("file_path", "unknown_source")
|
||||||
|
|
||||||
relations_section_list.append(
|
relations_context.append(
|
||||||
[
|
{
|
||||||
i + 1,
|
"id": i + 1,
|
||||||
e["src_tgt"][0],
|
"entity1": e["src_tgt"][0],
|
||||||
e["src_tgt"][1],
|
"entity2": e["src_tgt"][1],
|
||||||
e["description"],
|
"description": e["description"],
|
||||||
e["keywords"],
|
"keywords": e["keywords"],
|
||||||
e["weight"],
|
"weight": e["weight"],
|
||||||
e["rank"],
|
"rank": e["rank"],
|
||||||
created_at,
|
"created_at": created_at,
|
||||||
file_path,
|
"file_path": file_path,
|
||||||
]
|
}
|
||||||
)
|
)
|
||||||
relations_context = list_of_list_to_dict(relations_section_list)
|
|
||||||
|
|
||||||
text_units_section_list = [["id", "content", "file_path"]]
|
text_units_context = []
|
||||||
for i, t in enumerate(use_text_units):
|
for i, t in enumerate(use_text_units):
|
||||||
text_units_section_list.append(
|
text_units_context.append(
|
||||||
[i + 1, t["content"], t.get("file_path", "unknown_source")]
|
{
|
||||||
|
"id": i + 1,
|
||||||
|
"content": t["content"],
|
||||||
|
"file_path": t.get("file_path", "unknown_source"),
|
||||||
|
}
|
||||||
)
|
)
|
||||||
text_units_context = list_of_list_to_dict(text_units_section_list)
|
|
||||||
return entities_context, relations_context, text_units_context
|
return entities_context, relations_context, text_units_context
|
||||||
|
|
||||||
|
|
||||||
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
|
|||||||
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
||||||
)
|
)
|
||||||
|
|
||||||
relations_section_list = [
|
relations_context = []
|
||||||
[
|
|
||||||
"id",
|
|
||||||
"entity1",
|
|
||||||
"entity2",
|
|
||||||
"description",
|
|
||||||
"keywords",
|
|
||||||
"weight",
|
|
||||||
"rank",
|
|
||||||
"created_at",
|
|
||||||
"file_path",
|
|
||||||
]
|
|
||||||
]
|
|
||||||
for i, e in enumerate(edge_datas):
|
for i, e in enumerate(edge_datas):
|
||||||
created_at = e.get("created_at", "UNKNOWN")
|
created_at = e.get("created_at", "UNKNOWN")
|
||||||
# Convert timestamp to readable format
|
# Convert timestamp to readable format
|
||||||
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
|
|||||||
# Get file path from edge data
|
# Get file path from edge data
|
||||||
file_path = e.get("file_path", "unknown_source")
|
file_path = e.get("file_path", "unknown_source")
|
||||||
|
|
||||||
relations_section_list.append(
|
relations_context.append(
|
||||||
[
|
{
|
||||||
i + 1,
|
"id": i + 1,
|
||||||
e["src_id"],
|
"entity1": e["src_id"],
|
||||||
e["tgt_id"],
|
"entity2": e["tgt_id"],
|
||||||
e["description"],
|
"description": e["description"],
|
||||||
e["keywords"],
|
"keywords": e["keywords"],
|
||||||
e["weight"],
|
"weight": e["weight"],
|
||||||
e["rank"],
|
"rank": e["rank"],
|
||||||
created_at,
|
"created_at": created_at,
|
||||||
file_path,
|
"file_path": file_path,
|
||||||
]
|
}
|
||||||
)
|
)
|
||||||
relations_context = list_of_list_to_dict(relations_section_list)
|
|
||||||
|
|
||||||
entites_section_list = [
|
entities_context = []
|
||||||
["id", "entity", "type", "description", "rank", "created_at", "file_path"]
|
|
||||||
]
|
|
||||||
for i, n in enumerate(use_entities):
|
for i, n in enumerate(use_entities):
|
||||||
created_at = n.get("created_at", "UNKNOWN")
|
created_at = n.get("created_at", "UNKNOWN")
|
||||||
# Convert timestamp to readable format
|
# Convert timestamp to readable format
|
||||||
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
|
|||||||
# Get file path from node data
|
# Get file path from node data
|
||||||
file_path = n.get("file_path", "unknown_source")
|
file_path = n.get("file_path", "unknown_source")
|
||||||
|
|
||||||
entites_section_list.append(
|
entities_context.append(
|
||||||
[
|
{
|
||||||
i + 1,
|
"id": i + 1,
|
||||||
n["entity_name"],
|
"entity": n["entity_name"],
|
||||||
n.get("entity_type", "UNKNOWN"),
|
"type": n.get("entity_type", "UNKNOWN"),
|
||||||
n.get("description", "UNKNOWN"),
|
"description": n.get("description", "UNKNOWN"),
|
||||||
n["rank"],
|
"rank": n["rank"],
|
||||||
created_at,
|
"created_at": created_at,
|
||||||
file_path,
|
"file_path": file_path,
|
||||||
]
|
}
|
||||||
)
|
)
|
||||||
entities_context = list_of_list_to_dict(entites_section_list)
|
|
||||||
|
|
||||||
text_units_section_list = [["id", "content", "file_path"]]
|
text_units_context = []
|
||||||
for i, t in enumerate(use_text_units):
|
for i, t in enumerate(use_text_units):
|
||||||
text_units_section_list.append(
|
text_units_context.append(
|
||||||
[i + 1, t["content"], t.get("file_path", "unknown")]
|
{
|
||||||
|
"id": i + 1,
|
||||||
|
"content": t["content"],
|
||||||
|
"file_path": t.get("file_path", "unknown"),
|
||||||
|
}
|
||||||
)
|
)
|
||||||
text_units_context = list_of_list_to_dict(text_units_section_list)
|
|
||||||
return entities_context, relations_context, text_units_context
|
return entities_context, relations_context, text_units_context
|
||||||
|
|
||||||
|
|
||||||
|
@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
|
|||||||
return list_data
|
return list_data
|
||||||
|
|
||||||
|
|
||||||
def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
|
|
||||||
"""Convert a 2D string list (table-like data) into a list of dictionaries.
|
|
||||||
|
|
||||||
The first row is treated as header containing field names. Subsequent rows become
|
|
||||||
dictionary entries where keys come from header and values from row data.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data: 2D string array where first row contains headers and rest are data rows.
|
|
||||||
Minimum 2 columns required in data rows (rows with <2 elements are skipped).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of dictionaries where each dict represents a data row with:
|
|
||||||
- Keys: Header values from first row
|
|
||||||
- Values: Corresponding row values (empty string if missing)
|
|
||||||
|
|
||||||
Example:
|
|
||||||
Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
|
|
||||||
Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
|
|
||||||
"""
|
|
||||||
if not data or len(data) <= 1:
|
|
||||||
return []
|
|
||||||
|
|
||||||
header = data[0]
|
|
||||||
result = []
|
|
||||||
|
|
||||||
for row in data[1:]:
|
|
||||||
if len(row) >= 2:
|
|
||||||
item = {}
|
|
||||||
for i, field_name in enumerate(header):
|
|
||||||
if i < len(row):
|
|
||||||
item[field_name] = str(row[i])
|
|
||||||
else:
|
|
||||||
item[field_name] = ""
|
|
||||||
result.append(item)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def save_data_to_file(data, file_name):
|
def save_data_to_file(data, file_name):
|
||||||
with open(file_name, "w", encoding="utf-8") as f:
|
with open(file_name, "w", encoding="utf-8") as f:
|
||||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||||
|
Reference in New Issue
Block a user