Revise the context format of chunks from CSV to JSON to enhance compatibility with LLM

This commit is contained in:
孟超
2025-04-19 15:18:33 +08:00
parent 4fd40fd798
commit a20d68d865
2 changed files with 35 additions and 25 deletions

View File

@@ -1316,15 +1316,15 @@ async def _build_query_context(
result = f""" result = f"""
-----Entities----- -----Entities-----
```csv ```json
{entities_context} {entities_context}
``` ```
-----Relationships----- -----Relationships-----
```csv ```json
{relations_context} {relations_context}
``` ```
-----Sources----- -----Sources-----
```csv ```json
{text_units_context} {text_units_context}
``` ```
""".strip() """.strip()

View File

@@ -473,40 +473,50 @@ def xml_to_json(xml_file):
def process_combine_contexts(hl: str, ll: str): def process_combine_contexts(hl: str, ll: str):
header = None list_hl = csv_string_to_list(hl.strip()) if hl.strip() else []
list_hl = csv_string_to_list(hl.strip()) list_ll = csv_string_to_list(ll.strip()) if ll.strip() else []
list_ll = csv_string_to_list(ll.strip())
if list_hl: if not list_hl and not list_ll:
return json.dumps([], ensure_ascii=False)
header = None
if list_hl and len(list_hl) > 0:
header = list_hl[0] header = list_hl[0]
list_hl = list_hl[1:] list_hl = list_hl[1:]
if list_ll: if list_ll and len(list_ll) > 0:
header = list_ll[0] if header is None:
header = list_ll[0]
list_ll = list_ll[1:] list_ll = list_ll[1:]
if header is None: if header is None:
return "" return json.dumps([], ensure_ascii=False)
if list_hl: combined_data = []
list_hl = [",".join(item[1:]) for item in list_hl if item]
if list_ll:
list_ll = [",".join(item[1:]) for item in list_ll if item]
combined_sources = []
seen = set() seen = set()
for item in list_hl + list_ll: def process_row(row):
if item and item not in seen: if len(row) < 2:
combined_sources.append(item) return None
seen.add(item)
combined_sources_result = [",\t".join(header)] item_data = {}
for i, item in enumerate(combined_sources, start=1): for i, field_name in enumerate(header):
combined_sources_result.append(f"{i},\t{item}") item_data[field_name] = row[i]
combined_sources_result = "\n".join(combined_sources_result) return item_data
return combined_sources_result for row in list_hl + list_ll:
# 创建内容的标识符用于去重(跳过第一列的索引)
if len(row) >= 2:
row_identifier = json.dumps(row[1:])
if row_identifier not in seen:
seen.add(row_identifier)
item = process_row(row)
if item:
combined_data.append(item)
return json.dumps(combined_data, ensure_ascii=False)
async def get_best_cached_response( async def get_best_cached_response(