Merge pull request #1537 from danielaskdd/redundant-chunk-fetch
Eliminate redundant chunk data fetching in naive and mix query mode
This commit is contained in:
@@ -1 +1 @@
|
||||
__api_version__ = "0166"
|
||||
__api_version__ = "0167"
|
||||
|
@@ -390,6 +390,8 @@ class LightRAG:
|
||||
),
|
||||
embedding_func=self.embedding_func,
|
||||
)
|
||||
|
||||
# TODO: deprecating, text_chunks is redundant with chunks_vdb
|
||||
self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
||||
namespace=make_namespace(
|
||||
self.namespace_prefix, NameSpace.KV_STORE_TEXT_CHUNKS
|
||||
|
@@ -1213,15 +1213,12 @@ async def mix_kg_vector_query(
|
||||
if not results:
|
||||
return None
|
||||
|
||||
chunks_ids = [r["id"] for r in results]
|
||||
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
||||
|
||||
valid_chunks = []
|
||||
for chunk, result in zip(chunks, results):
|
||||
if chunk is not None and "content" in chunk:
|
||||
# Merge chunk content and time metadata
|
||||
for result in results:
|
||||
if "content" in result:
|
||||
# Directly use content from chunks_vdb.query result
|
||||
chunk_with_time = {
|
||||
"content": chunk["content"],
|
||||
"content": result["content"],
|
||||
"created_at": result.get("created_at", None),
|
||||
"file_path": result.get("file_path", None),
|
||||
}
|
||||
@@ -1256,9 +1253,9 @@ async def mix_kg_vector_query(
|
||||
formatted_chunks.append(chunk_text)
|
||||
|
||||
logger.debug(
|
||||
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
f"Truncate chunks from {len(valid_chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
)
|
||||
return "\n--New Chunk--\n".join(formatted_chunks)
|
||||
return "\n\n--New Chunk--\n".join(formatted_chunks)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in get_vector_context: {e}")
|
||||
return None
|
||||
@@ -2052,13 +2049,7 @@ async def naive_query(
|
||||
if not len(results):
|
||||
return PROMPTS["fail_response"]
|
||||
|
||||
chunks_ids = [r["id"] for r in results]
|
||||
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
||||
|
||||
# Filter out invalid chunks
|
||||
valid_chunks = [
|
||||
chunk for chunk in chunks if chunk is not None and "content" in chunk
|
||||
]
|
||||
valid_chunks = [result for result in results if "content" in result]
|
||||
|
||||
if not valid_chunks:
|
||||
logger.warning("No valid chunks found after filtering")
|
||||
@@ -2077,13 +2068,13 @@ async def naive_query(
|
||||
return PROMPTS["fail_response"]
|
||||
|
||||
logger.debug(
|
||||
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
)
|
||||
logger.info(
|
||||
f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}"
|
||||
)
|
||||
|
||||
section = "\n--New Chunk--\n".join(
|
||||
section = "\n\n--New Chunk--\n".join(
|
||||
[
|
||||
"File path: " + c["file_path"] + "\n" + c["content"]
|
||||
for c in maybe_trun_chunks
|
||||
|
Reference in New Issue
Block a user