Elimiate redunction chunk data fecth for niave query mode

This commit is contained in:
yangdx
2025-05-07 01:46:23 +08:00
parent 4b35dbcf19
commit 52d8815230

View File

@@ -1213,15 +1213,12 @@ async def mix_kg_vector_query(
if not results: if not results:
return None return None
chunks_ids = [r["id"] for r in results]
chunks = await text_chunks_db.get_by_ids(chunks_ids)
valid_chunks = [] valid_chunks = []
for chunk, result in zip(chunks, results): for result in results:
if chunk is not None and "content" in chunk: if "content" in result:
# Merge chunk content and time metadata # Directly use content from chunks_vdb.query result
chunk_with_time = { chunk_with_time = {
"content": chunk["content"], "content": result["content"],
"created_at": result.get("created_at", None), "created_at": result.get("created_at", None),
"file_path": result.get("file_path", None), "file_path": result.get("file_path", None),
} }
@@ -1256,9 +1253,9 @@ async def mix_kg_vector_query(
formatted_chunks.append(chunk_text) formatted_chunks.append(chunk_text)
logger.debug( logger.debug(
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" f"Truncate chunks from {len(valid_chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
) )
return "\n--New Chunk--\n".join(formatted_chunks) return "\n\n--New Chunk--\n".join(formatted_chunks)
except Exception as e: except Exception as e:
logger.error(f"Error in get_vector_context: {e}") logger.error(f"Error in get_vector_context: {e}")
return None return None
@@ -2052,12 +2049,9 @@ async def naive_query(
if not len(results): if not len(results):
return PROMPTS["fail_response"] return PROMPTS["fail_response"]
chunks_ids = [r["id"] for r in results] # 直接从 chunks_vdb.query 结果中获取内容
chunks = await text_chunks_db.get_by_ids(chunks_ids)
# Filter out invalid chunks
valid_chunks = [ valid_chunks = [
chunk for chunk in chunks if chunk is not None and "content" in chunk result for result in results if "content" in result
] ]
if not valid_chunks: if not valid_chunks:
@@ -2077,13 +2071,13 @@ async def naive_query(
return PROMPTS["fail_response"] return PROMPTS["fail_response"]
logger.debug( logger.debug(
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
) )
logger.info( logger.info(
f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}" f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}"
) )
section = "\n--New Chunk--\n".join( section = "\n\n--New Chunk--\n".join(
[ [
"File path: " + c["file_path"] + "\n" + c["content"] "File path: " + c["file_path"] + "\n" + c["content"]
for c in maybe_trun_chunks for c in maybe_trun_chunks