Merge pull request #1537 from danielaskdd/redundant-chunk-fetch
Eliminate redundant chunk data fetching in naive and mix query mode
This commit is contained in:
@@ -1 +1 @@
|
|||||||
__api_version__ = "0166"
|
__api_version__ = "0167"
|
||||||
|
@@ -390,6 +390,8 @@ class LightRAG:
|
|||||||
),
|
),
|
||||||
embedding_func=self.embedding_func,
|
embedding_func=self.embedding_func,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO: deprecating, text_chunks is redundant with chunks_vdb
|
||||||
self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
self.text_chunks: BaseKVStorage = self.key_string_value_json_storage_cls( # type: ignore
|
||||||
namespace=make_namespace(
|
namespace=make_namespace(
|
||||||
self.namespace_prefix, NameSpace.KV_STORE_TEXT_CHUNKS
|
self.namespace_prefix, NameSpace.KV_STORE_TEXT_CHUNKS
|
||||||
|
@@ -1213,15 +1213,12 @@ async def mix_kg_vector_query(
|
|||||||
if not results:
|
if not results:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
chunks_ids = [r["id"] for r in results]
|
|
||||||
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
|
||||||
|
|
||||||
valid_chunks = []
|
valid_chunks = []
|
||||||
for chunk, result in zip(chunks, results):
|
for result in results:
|
||||||
if chunk is not None and "content" in chunk:
|
if "content" in result:
|
||||||
# Merge chunk content and time metadata
|
# Directly use content from chunks_vdb.query result
|
||||||
chunk_with_time = {
|
chunk_with_time = {
|
||||||
"content": chunk["content"],
|
"content": result["content"],
|
||||||
"created_at": result.get("created_at", None),
|
"created_at": result.get("created_at", None),
|
||||||
"file_path": result.get("file_path", None),
|
"file_path": result.get("file_path", None),
|
||||||
}
|
}
|
||||||
@@ -1256,9 +1253,9 @@ async def mix_kg_vector_query(
|
|||||||
formatted_chunks.append(chunk_text)
|
formatted_chunks.append(chunk_text)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
f"Truncate chunks from {len(valid_chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||||
)
|
)
|
||||||
return "\n--New Chunk--\n".join(formatted_chunks)
|
return "\n\n--New Chunk--\n".join(formatted_chunks)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in get_vector_context: {e}")
|
logger.error(f"Error in get_vector_context: {e}")
|
||||||
return None
|
return None
|
||||||
@@ -2052,13 +2049,7 @@ async def naive_query(
|
|||||||
if not len(results):
|
if not len(results):
|
||||||
return PROMPTS["fail_response"]
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
chunks_ids = [r["id"] for r in results]
|
valid_chunks = [result for result in results if "content" in result]
|
||||||
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
|
||||||
|
|
||||||
# Filter out invalid chunks
|
|
||||||
valid_chunks = [
|
|
||||||
chunk for chunk in chunks if chunk is not None and "content" in chunk
|
|
||||||
]
|
|
||||||
|
|
||||||
if not valid_chunks:
|
if not valid_chunks:
|
||||||
logger.warning("No valid chunks found after filtering")
|
logger.warning("No valid chunks found after filtering")
|
||||||
@@ -2077,13 +2068,13 @@ async def naive_query(
|
|||||||
return PROMPTS["fail_response"]
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}"
|
f"Naive query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}"
|
||||||
)
|
)
|
||||||
|
|
||||||
section = "\n--New Chunk--\n".join(
|
section = "\n\n--New Chunk--\n".join(
|
||||||
[
|
[
|
||||||
"File path: " + c["file_path"] + "\n" + c["content"]
|
"File path: " + c["file_path"] + "\n" + c["content"]
|
||||||
for c in maybe_trun_chunks
|
for c in maybe_trun_chunks
|
||||||
|
Reference in New Issue
Block a user