fix citation

This commit is contained in:
zrguo
2025-03-28 13:30:24 +08:00
parent 3b71568053
commit 87fbffde14
2 changed files with 14 additions and 9 deletions

View File

@@ -1038,7 +1038,7 @@ async def mix_kg_vector_query(
# Include time information in content # Include time information in content
formatted_chunks = [] formatted_chunks = []
for c in maybe_trun_chunks: for c in maybe_trun_chunks:
chunk_text = c["content"] chunk_text = "File path: " + c["file_path"] + "\n" + c["content"]
if c["created_at"]: if c["created_at"]:
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}" chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
formatted_chunks.append(chunk_text) formatted_chunks.append(chunk_text)
@@ -1334,9 +1334,9 @@ async def _get_node_data(
) )
relations_context = list_of_list_to_csv(relations_section_list) relations_context = list_of_list_to_csv(relations_section_list)
text_units_section_list = [["id", "content"]] text_units_section_list = [["id", "content", "file_path"]]
for i, t in enumerate(use_text_units): for i, t in enumerate(use_text_units):
text_units_section_list.append([i, t["content"]]) text_units_section_list.append([i, t["content"], t["file_path"]])
text_units_context = list_of_list_to_csv(text_units_section_list) text_units_context = list_of_list_to_csv(text_units_section_list)
return entities_context, relations_context, text_units_context return entities_context, relations_context, text_units_context
@@ -1597,9 +1597,9 @@ async def _get_edge_data(
) )
entities_context = list_of_list_to_csv(entites_section_list) entities_context = list_of_list_to_csv(entites_section_list)
text_units_section_list = [["id", "content"]] text_units_section_list = [["id", "content", "file_path"]]
for i, t in enumerate(use_text_units): for i, t in enumerate(use_text_units):
text_units_section_list.append([i, t["content"]]) text_units_section_list.append([i, t["content"], t["file_path"]])
text_units_context = list_of_list_to_csv(text_units_section_list) text_units_context = list_of_list_to_csv(text_units_section_list)
return entities_context, relations_context, text_units_context return entities_context, relations_context, text_units_context
@@ -1785,7 +1785,12 @@ async def naive_query(
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
) )
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks]) section = "\n--New Chunk--\n".join(
[
"File path: " + c["file_path"] + "\n" + c["content"]
for c in maybe_trun_chunks
]
)
if query_param.only_need_context: if query_param.only_need_context:
return section return section

View File

@@ -222,7 +222,7 @@ When handling relationships with timestamps:
- Use markdown formatting with appropriate section headings - Use markdown formatting with appropriate section headings
- Please respond in the same language as the user's question. - Please respond in the same language as the user's question.
- Ensure the response maintains continuity with the conversation history. - Ensure the response maintains continuity with the conversation history.
- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path) - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
- If you don't know the answer, just say so. - If you don't know the answer, just say so.
- Do not make anything up. Do not include information not provided by the Knowledge Base.""" - Do not make anything up. Do not include information not provided by the Knowledge Base."""
@@ -320,7 +320,7 @@ When handling content with timestamps:
- Use markdown formatting with appropriate section headings - Use markdown formatting with appropriate section headings
- Please respond in the same language as the user's question. - Please respond in the same language as the user's question.
- Ensure the response maintains continuity with the conversation history. - Ensure the response maintains continuity with the conversation history.
- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path) - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
- If you don't know the answer, just say so. - If you don't know the answer, just say so.
- Do not include information not provided by the Document Chunks.""" - Do not include information not provided by the Document Chunks."""
@@ -382,6 +382,6 @@ When handling information with timestamps:
- Ensure the response maintains continuity with the conversation history. - Ensure the response maintains continuity with the conversation history.
- Organize answer in sections focusing on one main point or aspect of the answer - Organize answer in sections focusing on one main point or aspect of the answer
- Use clear and descriptive section titles that reflect the content - Use clear and descriptive section titles that reflect the content
- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path) - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
- If you don't know the answer, just say so. Do not make anything up. - If you don't know the answer, just say so. Do not make anything up.
- Do not include information not provided by the Data Sources.""" - Do not include information not provided by the Data Sources."""