diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py index fd9cc386..3b193df9 100644 --- a/lightrag/api/__init__.py +++ b/lightrag/api/__init__.py @@ -1 +1 @@ -__api_version__ = "0166" +__api_version__ = "0167" diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index e078a7e9..e9cb0926 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -390,6 +390,8 @@ class LightRAG: ), embedding_func=self.embedding_func, ) + + # TODO: deprecating, text_chunks is redundant with chunks_vdb self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore namespace=make_namespace( self.namespace_prefix, NameSpace.KV_STORE_TEXT_CHUNKS diff --git a/lightrag/operate.py b/lightrag/operate.py index 2363d5eb..d5ff0a0c 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1213,15 +1213,12 @@ async def mix_kg_vector_query( if not results: return None - chunks_ids = [r["id"] for r in results] - chunks = await text_chunks_db.get_by_ids(chunks_ids) - valid_chunks = [] - for chunk, result in zip(chunks, results): - if chunk is not None and "content" in chunk: - # Merge chunk content and time metadata + for result in results: + if "content" in result: + # Directly use content from chunks_vdb.query result chunk_with_time = { - "content": chunk["content"], + "content": result["content"], "created_at": result.get("created_at", None), "file_path": result.get("file_path", None), } @@ -1256,9 +1253,9 @@ async def mix_kg_vector_query( formatted_chunks.append(chunk_text) logger.debug( - f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" + f"Truncate chunks from {len(valid_chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) - return "\n--New Chunk--\n".join(formatted_chunks) + return "\n\n--New Chunk--\n".join(formatted_chunks) except Exception as e: logger.error(f"Error in get_vector_context: {e}") return None @@ -2052,13 +2049,7 @@ async def naive_query( if not len(results): return PROMPTS["fail_response"] - chunks_ids = [r["id"] for r in results] - chunks = await text_chunks_db.get_by_ids(chunks_ids) - - # Filter out invalid chunks - valid_chunks = [ - chunk for chunk in chunks if chunk is not None and "content" in chunk - ] + valid_chunks = [result for result in results if "content" in result] if not valid_chunks: logger.warning("No valid chunks found after filtering") @@ -2077,13 +2068,13 @@ async def naive_query( return PROMPTS["fail_response"] logger.debug( - f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" + f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) logger.info( f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}" ) - section = "\n--New Chunk--\n".join( + section = "\n\n--New Chunk--\n".join( [ "File path: " + c["file_path"] + "\n" + c["content"] for c in maybe_trun_chunks