From 41cbecdbe6a6626870270f0fa141ec22e8d36550 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 21:26:29 +0800 Subject: [PATCH] Add token size limit truncation for node data retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add truncate_list_by_token_size function • Limit max tokens for local context • Add logging for truncation info • Apply truncation to node_datas list --- lightrag/operate.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lightrag/operate.py b/lightrag/operate.py index d95037bd..04d06e6b 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1108,6 +1108,17 @@ async def _get_node_data( node_datas, query_param, knowledge_graph_inst ), ) + + len_node_datas = len(node_datas) + node_datas = truncate_list_by_token_size( + node_datas, + key=lambda x: x["description"], + max_token_size=query_param.max_token_for_local_context, + ) + logger.info( + f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" + ) + logger.info( f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks" )