From 601df31edf366efb91a1a8b16e754d626c8aba86 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 19:26:57 +0800 Subject: [PATCH 01/14] feat: move query-related settings to env file for better configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add env vars for token and chunk settings • Add token count logging for prompts • Add token count logging for context • Move hardcoded values to env variables • Improve logging clarity and consistency --- .env.example | 19 +++++++++++----- lightrag/base.py | 8 ++++--- lightrag/lightrag.py | 6 ++--- lightrag/operate.py | 53 +++++++++++++++++++++++++++++++++++++++----- 4 files changed, 69 insertions(+), 17 deletions(-) diff --git a/.env.example b/.env.example index 2701335a..7057281d 100644 --- a/.env.example +++ b/.env.example @@ -27,14 +27,21 @@ TIMEOUT=300 ### RAG Configuration MAX_ASYNC=4 -MAX_TOKENS=32768 EMBEDDING_DIM=1024 MAX_EMBED_TOKENS=8192 -#HISTORY_TURNS=3 -#CHUNK_SIZE=1200 -#CHUNK_OVERLAP_SIZE=100 -#COSINE_THRESHOLD=0.2 -#TOP_K=60 +### Settings relative to query +HISTORY_TURNS=3 +COSINE_THRESHOLD=0.2 +TOP_K=60 +MAX_TOKEN_TEXT_CHUNK = 4000 +MAX_TOKEN_RELATION_DESC = 4000 +MAX_TOKEN_ENTITY_DESC = 4000 +### Settings relative to indexing +CHUNK_SIZE=1200 +CHUNK_OVERLAP_SIZE=100 +MAX_TOKENS=32768 +MAX_TOKEN_SUMMARY=500 +SUMMARY_LANGUAGE=English ### LLM Configuration (Use valid host. For local services, you can use host.docker.internal) ### Ollama example diff --git a/lightrag/base.py b/lightrag/base.py index e75167c4..aa8e6d9e 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -54,13 +54,15 @@ class QueryParam: top_k: int = int(os.getenv("TOP_K", "60")) """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.""" - max_token_for_text_unit: int = 4000 + max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000")) """Maximum number of tokens allowed for each retrieved text chunk.""" - max_token_for_global_context: int = 4000 + max_token_for_global_context: int = int( + os.getenv("MAX_TOKEN_RELATION_DESC", "4000") + ) """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" - max_token_for_local_context: int = 4000 + max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) """Maximum number of tokens allocated for entity descriptions in local retrieval.""" hl_keywords: list[str] = field(default_factory=list) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 9f74c917..554cba22 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -263,10 +263,10 @@ class LightRAG: """Directory where logs are stored. Defaults to the current working directory.""" # Text chunking - chunk_token_size: int = 1200 + chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200")) """Maximum number of tokens per text chunk when splitting documents.""" - chunk_overlap_token_size: int = 100 + chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100")) """Number of overlapping tokens between consecutive text chunks to preserve context.""" tiktoken_model_name: str = "gpt-4o-mini" @@ -276,7 +276,7 @@ class LightRAG: entity_extract_max_gleaning: int = 1 """Maximum number of entity extraction attempts for ambiguous content.""" - entity_summary_to_max_tokens: int = 500 + entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500")) """Maximum number of tokens used for summarizing extracted entities.""" # Node embedding diff --git a/lightrag/operate.py b/lightrag/operate.py index 04aad0d4..fb351a71 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -642,9 +642,13 @@ async def kg_query( history=history_context, ) + if query_param.only_need_prompt: return sys_prompt + len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) + logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}") + response = await use_model_func( query, system_prompt=sys_prompt, @@ -730,6 +734,9 @@ async def extract_keywords_only( query=text, examples=examples, language=language, history=history_context ) + len_of_prompts = len(encode_string_by_tiktoken(kw_prompt)) + logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}") + # 5. Call the LLM for keyword extraction use_model_func = global_config["llm_model_func"] result = await use_model_func(kw_prompt, keyword_extraction=True) @@ -893,7 +900,9 @@ async def mix_kg_vector_query( chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}" formatted_chunks.append(chunk_text) - logger.info(f"Truncate {len(chunks)} to {len(formatted_chunks)} chunks") + logger.info( + f"Truncate text chunks from {len(chunks)} to {len(formatted_chunks)}" + ) return "\n--New Chunk--\n".join(formatted_chunks) except Exception as e: logger.error(f"Error in get_vector_context: {e}") @@ -926,6 +935,9 @@ async def mix_kg_vector_query( if query_param.only_need_prompt: return sys_prompt + len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) + logger.info(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}") + # 6. Generate response response = await use_model_func( query, @@ -1031,7 +1043,7 @@ async def _build_query_context( if not entities_context.strip() and not relations_context.strip(): return None - return f""" + result = f""" -----Entities----- ```csv {entities_context} @@ -1045,6 +1057,15 @@ async def _build_query_context( {text_units_context} ``` """ + contex_tokens = len(encode_string_by_tiktoken(result)) + entities_tokens = len(encode_string_by_tiktoken(entities_context)) + relations_tokens = len(encode_string_by_tiktoken(relations_context)) + text_units_tokens = len(encode_string_by_tiktoken(text_units_context)) + logger.info( + f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}" + ) + + return result async def _get_node_data( @@ -1089,7 +1110,7 @@ async def _get_node_data( ), ) logger.info( - f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units" + f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks" ) # build prompt @@ -1222,6 +1243,10 @@ async def _find_most_related_text_unit_from_entities( max_token_size=query_param.max_token_for_text_unit, ) + logger.info( + f"Truncate text chunks from {len(all_text_units_lookup)} to {len(all_text_units)}" + ) + all_text_units = [t["data"] for t in all_text_units] return all_text_units @@ -1263,6 +1288,9 @@ async def _find_most_related_edges_from_entities( key=lambda x: x["description"], max_token_size=query_param.max_token_for_global_context, ) + + logger.info(f"Truncate relations from {len(all_edges)} to {len(all_edges_data)}") + return all_edges_data @@ -1310,11 +1338,13 @@ async def _get_edge_data( edge_datas = sorted( edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True ) + len_edge_datas = len(edge_datas) edge_datas = truncate_list_by_token_size( edge_datas, key=lambda x: x["description"], max_token_size=query_param.max_token_for_global_context, ) + logger.info(f"Truncate relations from {len_edge_datas} to {len(edge_datas)}") use_entities, use_text_units = await asyncio.gather( _find_most_related_entities_from_relationships( @@ -1325,7 +1355,7 @@ async def _get_edge_data( ), ) logger.info( - f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units" + f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks" ) relations_section_list = [ @@ -1414,11 +1444,13 @@ async def _find_most_related_entities_from_relationships( for k, n, d in zip(entity_names, node_datas, node_degrees) ] + len_node_datas = len(node_datas) node_datas = truncate_list_by_token_size( node_datas, key=lambda x: x["description"], max_token_size=query_param.max_token_for_local_context, ) + logger.info(f"Truncate entities from {len_node_datas} to {len(node_datas)}") return node_datas @@ -1474,6 +1506,10 @@ async def _find_related_text_unit_from_relationships( max_token_size=query_param.max_token_for_text_unit, ) + logger.info( + f"Truncate text chunks from {len(valid_text_units)} to {len(truncated_text_units)}" + ) + all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units] return all_text_units @@ -1541,7 +1577,8 @@ async def naive_query( logger.warning("No chunks left after truncation") return PROMPTS["fail_response"] - logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks") + logger.info(f"Truncate text chunks from {len(chunks)} to {len(maybe_trun_chunks)}") + section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks]) if query_param.only_need_context: @@ -1564,6 +1601,9 @@ async def naive_query( if query_param.only_need_prompt: return sys_prompt + len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) + logger.info(f"[naive_query]Prompt Tokens: {len_of_prompts}") + response = await use_model_func( query, system_prompt=sys_prompt, @@ -1706,6 +1746,9 @@ async def kg_query_with_keywords( if query_param.only_need_prompt: return sys_prompt + len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) + logger.info(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") + response = await use_model_func( query, system_prompt=sys_prompt, From 36ff0bfa85b037e36203f53f1c12666b0092e2fb Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 19:47:20 +0800 Subject: [PATCH 02/14] Improve logging message clarity by including max tokens info for truncation --- lightrag/operate.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index fb351a71..d95037bd 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -642,7 +642,6 @@ async def kg_query( history=history_context, ) - if query_param.only_need_prompt: return sys_prompt @@ -901,7 +900,7 @@ async def mix_kg_vector_query( formatted_chunks.append(chunk_text) logger.info( - f"Truncate text chunks from {len(chunks)} to {len(formatted_chunks)}" + f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) return "\n--New Chunk--\n".join(formatted_chunks) except Exception as e: @@ -1244,7 +1243,7 @@ async def _find_most_related_text_unit_from_entities( ) logger.info( - f"Truncate text chunks from {len(all_text_units_lookup)} to {len(all_text_units)}" + f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})" ) all_text_units = [t["data"] for t in all_text_units] @@ -1289,7 +1288,9 @@ async def _find_most_related_edges_from_entities( max_token_size=query_param.max_token_for_global_context, ) - logger.info(f"Truncate relations from {len(all_edges)} to {len(all_edges_data)}") + logger.info( + f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})" + ) return all_edges_data @@ -1344,7 +1345,9 @@ async def _get_edge_data( key=lambda x: x["description"], max_token_size=query_param.max_token_for_global_context, ) - logger.info(f"Truncate relations from {len_edge_datas} to {len(edge_datas)}") + logger.info( + f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})" + ) use_entities, use_text_units = await asyncio.gather( _find_most_related_entities_from_relationships( @@ -1450,7 +1453,9 @@ async def _find_most_related_entities_from_relationships( key=lambda x: x["description"], max_token_size=query_param.max_token_for_local_context, ) - logger.info(f"Truncate entities from {len_node_datas} to {len(node_datas)}") + logger.info( + f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" + ) return node_datas @@ -1507,7 +1512,7 @@ async def _find_related_text_unit_from_relationships( ) logger.info( - f"Truncate text chunks from {len(valid_text_units)} to {len(truncated_text_units)}" + f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})" ) all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units] @@ -1577,7 +1582,9 @@ async def naive_query( logger.warning("No chunks left after truncation") return PROMPTS["fail_response"] - logger.info(f"Truncate text chunks from {len(chunks)} to {len(maybe_trun_chunks)}") + logger.info( + f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" + ) section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks]) From 4e354451b7a0087f20cb9311708b384cadc23d89 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 20:05:45 +0800 Subject: [PATCH 03/14] Fix load_dotenv() missing problem for base.py --- lightrag/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/base.py b/lightrag/base.py index aa8e6d9e..210b0ca6 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -1,4 +1,5 @@ import os +from dotenv import load_dotenv from dataclasses import dataclass, field from enum import Enum from typing import ( @@ -9,12 +10,11 @@ from typing import ( TypeVar, Union, ) - import numpy as np - from .utils import EmbeddingFunc from .types import KnowledgeGraph +load_dotenv() class TextChunkSchema(TypedDict): tokens: int From 7f13b779e17d9ecd474f70bcaa59483a24fe11b9 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 20:15:14 +0800 Subject: [PATCH 04/14] Fix linting --- .env.example | 6 +++--- lightrag/base.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index 7057281d..c4c5847b 100644 --- a/.env.example +++ b/.env.example @@ -33,9 +33,9 @@ MAX_EMBED_TOKENS=8192 HISTORY_TURNS=3 COSINE_THRESHOLD=0.2 TOP_K=60 -MAX_TOKEN_TEXT_CHUNK = 4000 -MAX_TOKEN_RELATION_DESC = 4000 -MAX_TOKEN_ENTITY_DESC = 4000 +MAX_TOKEN_TEXT_CHUNK=4000 +MAX_TOKEN_RELATION_DESC=4000 +MAX_TOKEN_ENTITY_DESC=4000 ### Settings relative to indexing CHUNK_SIZE=1200 CHUNK_OVERLAP_SIZE=100 diff --git a/lightrag/base.py b/lightrag/base.py index 210b0ca6..ca1fac7f 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -16,6 +16,7 @@ from .types import KnowledgeGraph load_dotenv() + class TextChunkSchema(TypedDict): tokens: int content: str From 41cbecdbe6a6626870270f0fa141ec22e8d36550 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 21:26:29 +0800 Subject: [PATCH 05/14] Add token size limit truncation for node data retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add truncate_list_by_token_size function • Limit max tokens for local context • Add logging for truncation info • Apply truncation to node_datas list --- lightrag/operate.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lightrag/operate.py b/lightrag/operate.py index d95037bd..04d06e6b 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1108,6 +1108,17 @@ async def _get_node_data( node_datas, query_param, knowledge_graph_inst ), ) + + len_node_datas = len(node_datas) + node_datas = truncate_list_by_token_size( + node_datas, + key=lambda x: x["description"], + max_token_size=query_param.max_token_for_local_context, + ) + logger.info( + f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" + ) + logger.info( f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks" ) From cfb49fc013f117d2dfdaadeb14a9d37d72c10529 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 22:31:12 +0800 Subject: [PATCH 06/14] Delete inline comment about config.ini --- .gitignore | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 2d9a41f3..83246d18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,39 @@ -__pycache__ -*.egg-info -dickens/ -book.txt -lightrag-dev/ +# Python-related files +__pycache__/ +*.py[cod] +*.egg-info/ +.eggs/ +*.tgz +*.tar.gz +*.ini + +# Virtual Environment +.venv/ +env/ +venv/ +*.env* +.env_example + +# Build / Distribution +dist/ +build/ +site/ + +# Logs / Reports +*.log +*.logfire +*.coverage/ +log/ + +# Caches +.cache/ +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ +.gradio/ +temp/ + +# IDE / Editor Files .idea/ dist/ env/ From b450430109c5ff60799238f1360324d17b0d6f9b Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 22:42:53 +0800 Subject: [PATCH 07/14] Change log level from info to debug for token count logging --- lightrag/operate.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index fa52c55a..cc5dffe7 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -688,7 +688,7 @@ async def kg_query( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}") + logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") response = await use_model_func( query, @@ -776,7 +776,7 @@ async def extract_keywords_only( ) len_of_prompts = len(encode_string_by_tiktoken(kw_prompt)) - logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}") + logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") # 5. Call the LLM for keyword extraction use_model_func = global_config["llm_model_func"] @@ -941,7 +941,7 @@ async def mix_kg_vector_query( chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}" formatted_chunks.append(chunk_text) - logger.info( + logger.debug( f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) return "\n--New Chunk--\n".join(formatted_chunks) @@ -977,7 +977,7 @@ async def mix_kg_vector_query( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - logger.info(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}") + logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}") # 6. Generate response response = await use_model_func( @@ -1102,7 +1102,7 @@ async def _build_query_context( entities_tokens = len(encode_string_by_tiktoken(entities_context)) relations_tokens = len(encode_string_by_tiktoken(relations_context)) text_units_tokens = len(encode_string_by_tiktoken(text_units_context)) - logger.info( + logger.debug( f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}" ) @@ -1157,7 +1157,7 @@ async def _get_node_data( key=lambda x: x["description"], max_token_size=query_param.max_token_for_local_context, ) - logger.info( + logger.debug( f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" ) @@ -1295,7 +1295,7 @@ async def _find_most_related_text_unit_from_entities( max_token_size=query_param.max_token_for_text_unit, ) - logger.info( + logger.debug( f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})" ) @@ -1341,7 +1341,7 @@ async def _find_most_related_edges_from_entities( max_token_size=query_param.max_token_for_global_context, ) - logger.info( + logger.debug( f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})" ) @@ -1398,7 +1398,7 @@ async def _get_edge_data( key=lambda x: x["description"], max_token_size=query_param.max_token_for_global_context, ) - logger.info( + logger.debug( f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})" ) @@ -1506,7 +1506,7 @@ async def _find_most_related_entities_from_relationships( key=lambda x: x["description"], max_token_size=query_param.max_token_for_local_context, ) - logger.info( + logger.debug( f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})" ) @@ -1564,7 +1564,7 @@ async def _find_related_text_unit_from_relationships( max_token_size=query_param.max_token_for_text_unit, ) - logger.info( + logger.debug( f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})" ) @@ -1635,7 +1635,7 @@ async def naive_query( logger.warning("No chunks left after truncation") return PROMPTS["fail_response"] - logger.info( + logger.debug( f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})" ) @@ -1807,7 +1807,7 @@ async def kg_query_with_keywords( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - logger.info(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") + logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") response = await use_model_func( query, From 9ec920661da6eb5cf1e0db6cc98f9822d0b6a69f Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 16 Feb 2025 22:53:58 +0800 Subject: [PATCH 08/14] Improve token estimation accuracy by using tiktoken instead of regex-based approach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Replace regex token estimation method • Use tiktoken for more precise counting • Support all types of text input • Simplify code implementation --- lightrag/api/ollama_api.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/lightrag/api/ollama_api.py b/lightrag/api/ollama_api.py index 94703dee..7d9fe3b9 100644 --- a/lightrag/api/ollama_api.py +++ b/lightrag/api/ollama_api.py @@ -11,6 +11,7 @@ from fastapi.responses import StreamingResponse import asyncio from ascii_colors import trace_exception from lightrag import LightRAG, QueryParam +from lightrag.utils import encode_string_by_tiktoken from dotenv import load_dotenv @@ -111,18 +112,9 @@ class OllamaTagResponse(BaseModel): def estimate_tokens(text: str) -> int: - """Estimate the number of tokens in text - Chinese characters: approximately 1.5 tokens per character - English characters: approximately 0.25 tokens per character - """ - # Use regex to match Chinese and non-Chinese characters separately - chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text)) - non_chinese_chars = len(re.findall(r"[^\u4e00-\u9fff]", text)) - - # Calculate estimated token count - tokens = chinese_chars * 1.5 + non_chinese_chars * 0.25 - - return int(tokens) + """Estimate the number of tokens in text using tiktoken""" + tokens = encode_string_by_tiktoken(text) + return len(tokens) def parse_query_mode(query: str) -> tuple[str, SearchMode]: From 806eadf5dcd7ca00905e889898a22f3deb602e73 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 17 Feb 2025 01:38:18 +0800 Subject: [PATCH 09/14] Add verbose debug option to control detailed debug output level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Added VERBOSE env var & CLI flag • Implemented verbose_debug() function • Added verbose option to splash screen • Reduced default debug output length • Modified LLM debug logging behavior --- .env.example | 1 + lightrag/api/lightrag_server.py | 18 ++++++++++++++++-- lightrag/llm/openai.py | 6 +++--- lightrag/llm/zhipu.py | 3 ++- lightrag/operate.py | 7 ++++--- lightrag/utils.py | 17 +++++++++++++++++ 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index c4c5847b..f70244e5 100644 --- a/.env.example +++ b/.env.example @@ -18,6 +18,7 @@ ### Logging level LOG_LEVEL=INFO +VERBOSE=False ### Optional Timeout TIMEOUT=300 diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 97f1156f..4fe0b8ae 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -130,8 +130,8 @@ def get_env_value(env_key: str, default: Any, value_type: type = str) -> Any: if value is None: return default - if isinstance(value_type, bool): - return value.lower() in ("true", "1", "yes") + if value_type is bool: + return value.lower() in ("true", "1", "yes", "t", "on") try: return value_type(value) except ValueError: @@ -233,6 +233,8 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}") ASCIIColors.white(" ├─ Log Level: ", end="") ASCIIColors.yellow(f"{args.log_level}") + ASCIIColors.white(" ├─ Verbose Debug: ", end="") + ASCIIColors.yellow(f"{args.verbose}") ASCIIColors.white(" └─ Timeout: ", end="") ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}") @@ -564,6 +566,13 @@ def parse_args() -> argparse.Namespace: help="Prefix of the namespace", ) + parser.add_argument( + "--verbose", + type=bool, + default=get_env_value("VERBOSE", False, bool), + help="Verbose debug output(default: from env or false)", + ) + args = parser.parse_args() # conver relative path to absolute path @@ -685,6 +694,11 @@ global_top_k = 60 # default value def create_app(args): + # Initialize verbose debug setting + from lightrag.utils import set_verbose_debug + + set_verbose_debug(args.verbose) + global global_top_k global_top_k = args.top_k # save top_k from args diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index e6d00377..399e29df 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -40,7 +40,7 @@ __version__ = "1.0.0" __author__ = "lightrag Team" __status__ = "Production" - +from ..utils import verbose_debug import sys import os @@ -129,8 +129,8 @@ async def openai_complete_if_cache( logger.debug("===== Query Input to LLM =====") logger.debug(f"Model: {model} Base URL: {base_url}") logger.debug(f"Additional kwargs: {kwargs}") - logger.debug(f"Query: {prompt}") - logger.debug(f"System prompt: {system_prompt}") + verbose_debug(f"Query: {prompt}") + verbose_debug(f"System prompt: {system_prompt}") # logger.debug(f"Messages: {messages}") try: diff --git a/lightrag/llm/zhipu.py b/lightrag/llm/zhipu.py index 9f5d9ca5..5a73f41d 100644 --- a/lightrag/llm/zhipu.py +++ b/lightrag/llm/zhipu.py @@ -43,6 +43,7 @@ __status__ = "Production" import sys import re import json +from ..utils import verbose_debug if sys.version_info < (3, 9): pass @@ -119,7 +120,7 @@ async def zhipu_complete_if_cache( # Add debug logging logger.debug("===== Query Input to LLM =====") logger.debug(f"Query: {prompt}") - logger.debug(f"System prompt: {system_prompt}") + verbose_debug(f"System prompt: {system_prompt}") # Remove unsupported kwargs kwargs = { diff --git a/lightrag/operate.py b/lightrag/operate.py index cc5dffe7..23764957 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -24,6 +24,7 @@ from .utils import ( CacheData, statistic_data, get_conversation_turns, + verbose_debug, ) from .base import ( BaseGraphStorage, @@ -688,7 +689,7 @@ async def kg_query( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") + verbose_debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") response = await use_model_func( query, @@ -977,7 +978,7 @@ async def mix_kg_vector_query( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}") + verbose_debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}") # 6. Generate response response = await use_model_func( @@ -1807,7 +1808,7 @@ async def kg_query_with_keywords( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") + verbose_debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") response = await use_model_func( query, diff --git a/lightrag/utils.py b/lightrag/utils.py index c8786e7b..5eb82f66 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -20,6 +20,23 @@ import tiktoken from lightrag.prompt import PROMPTS +VERBOSE_DEBUG = False + + +def verbose_debug(msg: str, *args, **kwargs): + """Function for outputting detailed debug information. + When VERBOSE_DEBUG=True, outputs the complete message. + When VERBOSE_DEBUG=False, outputs only the first 30 characters. + """ + if VERBOSE_DEBUG: + logger.debug(msg, *args, **kwargs) + + +def set_verbose_debug(enabled: bool): + """Enable or disable verbose debug output""" + global VERBOSE_DEBUG + VERBOSE_DEBUG = enabled + class UnlimitedSemaphore: """A context manager that allows unlimited access.""" From 4fd1f214c106c6c963a1242a748bf04060ae1654 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 17 Feb 2025 03:57:58 +0800 Subject: [PATCH 10/14] Add verbose debug output option to API configuration - Introduced `--verbose` flag - Defaults to `False` --- lightrag/api/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/lightrag/api/README.md b/lightrag/api/README.md index d48b6732..69023685 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -222,6 +222,7 @@ You can select storage implementation by enviroment variables or command line a | --max-embed-tokens | 8192 | Maximum embedding token size | | --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout | | --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) | +| --verbose | False | Verbose debug output (True, Flase) | | --key | None | API key for authentication. Protects lightrag server against unauthorized access | | --ssl | False | Enable HTTPS | | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) | From 9963fb7ff4d4f299c1cccf49c39aebd201c1abf3 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 17 Feb 2025 11:37:38 +0800 Subject: [PATCH 11/14] Make VERBOSE_DEBUG configurable via environment variable - Adds environment variable support - Defaults to "false" if not set - Case-insensitive check for "true" - Simplifies debugging configuration - Maintains backward compatibility --- lightrag/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/utils.py b/lightrag/utils.py index 5eb82f66..f51f3640 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -20,7 +20,7 @@ import tiktoken from lightrag.prompt import PROMPTS -VERBOSE_DEBUG = False +VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true" def verbose_debug(msg: str, *args, **kwargs): From d3ff8c3537772b52f70741bdf03b7875b2a13200 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 17 Feb 2025 12:20:47 +0800 Subject: [PATCH 12/14] Set OpenAI logger level to INFO if VERBOSE_DEBUG is off --- lightrag/llm/openai.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 399e29df..96255086 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -40,9 +40,10 @@ __version__ = "1.0.0" __author__ = "lightrag Team" __status__ = "Production" -from ..utils import verbose_debug +from ..utils import verbose_debug, VERBOSE_DEBUG import sys import os +import logging if sys.version_info < (3, 9): from typing import AsyncIterator @@ -110,6 +111,11 @@ async def openai_complete_if_cache( "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}", "Content-Type": "application/json", } + + # Set openai logger level to INFO when VERBOSE_DEBUG is off + if not VERBOSE_DEBUG and logger.level == logging.DEBUG: + logging.getLogger("openai").setLevel(logging.INFO) + openai_async_client = ( AsyncOpenAI(default_headers=default_headers, api_key=api_key) if base_url is None @@ -125,15 +131,14 @@ async def openai_complete_if_cache( messages.extend(history_messages) messages.append({"role": "user", "content": prompt}) - # 添加日志输出 - logger.debug("===== Query Input to LLM =====") + logger.debug("===== Sending Query to LLM =====") logger.debug(f"Model: {model} Base URL: {base_url}") logger.debug(f"Additional kwargs: {kwargs}") verbose_debug(f"Query: {prompt}") verbose_debug(f"System prompt: {system_prompt}") # logger.debug(f"Messages: {messages}") - try: + try: if "response_format" in kwargs: response = await openai_async_client.beta.chat.completions.parse( model=model, messages=messages, **kwargs From ae37454c4243ee6ef205f88d349667ca8ab5bd1e Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 17 Feb 2025 12:28:49 +0800 Subject: [PATCH 13/14] Replace verbose_debug with logger.debug for token logging. - Removed unused verbose_debug import - Updated debug logging in kg_query - Updated debug logging in mix_kg_vector_query - Updated debug logging in kg_query_with_keywords --- lightrag/operate.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 23764957..cc5dffe7 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -24,7 +24,6 @@ from .utils import ( CacheData, statistic_data, get_conversation_turns, - verbose_debug, ) from .base import ( BaseGraphStorage, @@ -689,7 +688,7 @@ async def kg_query( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - verbose_debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") + logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}") response = await use_model_func( query, @@ -978,7 +977,7 @@ async def mix_kg_vector_query( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - verbose_debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}") + logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}") # 6. Generate response response = await use_model_func( @@ -1808,7 +1807,7 @@ async def kg_query_with_keywords( return sys_prompt len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt)) - verbose_debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") + logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}") response = await use_model_func( query, From b7cce9312f8734646be0459e0a21afd815015272 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 17 Feb 2025 12:34:54 +0800 Subject: [PATCH 14/14] Fix linting --- lightrag/llm/openai.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 96255086..024f7f52 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -115,7 +115,7 @@ async def openai_complete_if_cache( # Set openai logger level to INFO when VERBOSE_DEBUG is off if not VERBOSE_DEBUG and logger.level == logging.DEBUG: logging.getLogger("openai").setLevel(logging.INFO) - + openai_async_client = ( AsyncOpenAI(default_headers=default_headers, api_key=api_key) if base_url is None @@ -136,9 +136,8 @@ async def openai_complete_if_cache( logger.debug(f"Additional kwargs: {kwargs}") verbose_debug(f"Query: {prompt}") verbose_debug(f"System prompt: {system_prompt}") - # logger.debug(f"Messages: {messages}") - try: + try: if "response_format" in kwargs: response = await openai_async_client.beta.chat.completions.parse( model=model, messages=messages, **kwargs