From 52d88152309c8715b76fdf630781b9deae3a32b7 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 7 May 2025 01:46:23 +0800 Subject: [PATCH 1/4] Elimiate redunction chunk data fecth for niave query mode --- lightrag/operate.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 2363d5eb..a052a341 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1213,15 +1213,12 @@ async def mix_kg_vector_query( if not results: return None - chunks_ids = [r["id"] for r in results] - chunks = await text_chunks_db.get_by_ids(chunks_ids) - valid_chunks = [] - for chunk, result in zip(chunks, results): - if chunk is not None and "content" in chunk: - # Merge chunk content and time metadata + for result in results: + if "content" in result: + # Directly use content from chunks_vdb.query result chunk_with_time = { - "content": chunk["content"], + "content": result["content"], "created_at": result.get("created_at", None), "file_path": result.get("file_path", None), } @@ -1256,9 +1253,9 @@ async def mix_kg_vector_query( formatted_chunks.append(chunk_text) logger.debug( - f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" + f"Truncate chunks from {len(valid_chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) - return "\n--New Chunk--\n".join(formatted_chunks) + return "\n\n--New Chunk--\n".join(formatted_chunks) except Exception as e: logger.error(f"Error in get_vector_context: {e}") return None @@ -2052,12 +2049,9 @@ async def naive_query( if not len(results): return PROMPTS["fail_response"] - chunks_ids = [r["id"] for r in results] - chunks = await text_chunks_db.get_by_ids(chunks_ids) - - # Filter out invalid chunks + # 直接从 chunks_vdb.query 结果中获取内容 valid_chunks = [ - chunk for chunk in chunks if chunk is not None and "content" in chunk + result for result in results if "content" in result ] if not valid_chunks: @@ -2077,13 +2071,13 @@ async def naive_query( return PROMPTS["fail_response"] logger.debug( - f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" + f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) logger.info( f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}" ) - section = "\n--New Chunk--\n".join( + section = "\n\n--New Chunk--\n".join( [ "File path: " + c["file_path"] + "\n" + c["content"] for c in maybe_trun_chunks From b1f874b48920921dfa26e12ed2a9c8165814f532 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 7 May 2025 01:51:58 +0800 Subject: [PATCH 2/4] Fix linting --- lightrag/operate.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index a052a341..d5ff0a0c 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2049,10 +2049,7 @@ async def naive_query( if not len(results): return PROMPTS["fail_response"] - # 直接从 chunks_vdb.query 结果中获取内容 - valid_chunks = [ - result for result in results if "content" in result - ] + valid_chunks = [result for result in results if "content" in result] if not valid_chunks: logger.warning("No valid chunks found after filtering") From 0e3e936e1a299637838f6791eecf915c19914fe6 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 7 May 2025 01:52:29 +0800 Subject: [PATCH 3/4] Bump api version to 0167 --- lightrag/api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py index fd9cc386..3b193df9 100644 --- a/lightrag/api/__init__.py +++ b/lightrag/api/__init__.py @@ -1 +1 @@ -__api_version__ = "0166" +__api_version__ = "0167" From 365ef754478f68742a205a516a4ffafc85fceed0 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 7 May 2025 02:03:57 +0800 Subject: [PATCH 4/4] Add deprecating commend to text_chunks storage --- lightrag/lightrag.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index e078a7e9..e9cb0926 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -390,6 +390,8 @@ class LightRAG: ), embedding_func=self.embedding_func, ) + + # TODO: deprecating, text_chunks is redundant with chunks_vdb self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore namespace=make_namespace( self.namespace_prefix, NameSpace.KV_STORE_TEXT_CHUNKS