From 52d88152309c8715b76fdf630781b9deae3a32b7 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 7 May 2025 01:46:23 +0800 Subject: [PATCH] Elimiate redunction chunk data fecth for niave query mode --- lightrag/operate.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 2363d5eb..a052a341 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1213,15 +1213,12 @@ async def mix_kg_vector_query( if not results: return None - chunks_ids = [r["id"] for r in results] - chunks = await text_chunks_db.get_by_ids(chunks_ids) - valid_chunks = [] - for chunk, result in zip(chunks, results): - if chunk is not None and "content" in chunk: - # Merge chunk content and time metadata + for result in results: + if "content" in result: + # Directly use content from chunks_vdb.query result chunk_with_time = { - "content": chunk["content"], + "content": result["content"], "created_at": result.get("created_at", None), "file_path": result.get("file_path", None), } @@ -1256,9 +1253,9 @@ async def mix_kg_vector_query( formatted_chunks.append(chunk_text) logger.debug( - f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" + f"Truncate chunks from {len(valid_chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) - return "\n--New Chunk--\n".join(formatted_chunks) + return "\n\n--New Chunk--\n".join(formatted_chunks) except Exception as e: logger.error(f"Error in get_vector_context: {e}") return None @@ -2052,12 +2049,9 @@ async def naive_query( if not len(results): return PROMPTS["fail_response"] - chunks_ids = [r["id"] for r in results] - chunks = await text_chunks_db.get_by_ids(chunks_ids) - - # Filter out invalid chunks + # 直接从 chunks_vdb.query 结果中获取内容 valid_chunks = [ - chunk for chunk in chunks if chunk is not None and "content" in chunk + result for result in results if "content" in result ] if not valid_chunks: @@ -2077,13 +2071,13 @@ async def naive_query( return PROMPTS["fail_response"] logger.debug( - f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" + f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) logger.info( f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}" ) - section = "\n--New Chunk--\n".join( + section = "\n\n--New Chunk--\n".join( [ "File path: " + c["file_path"] + "\n" + c["content"] for c in maybe_trun_chunks