From 41cbecdbe6a6626870270f0fa141ec22e8d36550 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sun, 16 Feb 2025 21:26:29 +0800
Subject: [PATCH] Add token size limit truncation for node data retrieval
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Add truncate_list_by_token_size function
• Limit max tokens for local context
• Add logging for truncation info
• Apply truncation to node_datas list
---
 lightrag/operate.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index d95037bd..04d06e6b 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1108,6 +1108,17 @@ async def _get_node_data(
             node_datas, query_param, knowledge_graph_inst
         ),
     )
+
+    len_node_datas = len(node_datas)
+    node_datas = truncate_list_by_token_size(
+        node_datas,
+        key=lambda x: x["description"],
+        max_token_size=query_param.max_token_for_local_context,
+    )
+    logger.info(
+        f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
+    )
+
     logger.info(
         f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
     )