diff --git a/README-zh.md b/README-zh.md index 5471137c..d345562f 100644 --- a/README-zh.md +++ b/README-zh.md @@ -409,6 +409,54 @@ if __name__ == "__main__": +### Token统计功能 +
+ 概述和使用 + +LightRAG提供了TokenTracker工具来跟踪和管理大模型的token消耗。这个功能对于控制API成本和优化性能特别有用。 + +#### 使用方法 + +```python +from lightrag.utils import TokenTracker + +# 创建TokenTracker实例 +token_tracker = TokenTracker() + +# 方法1:使用上下文管理器(推荐) +# 适用于需要自动跟踪token使用的场景 +with token_tracker: + result1 = await llm_model_func("你的问题1") + result2 = await llm_model_func("你的问题2") + +# 方法2:手动添加token使用记录 +# 适用于需要更精细控制token统计的场景 +token_tracker.reset() + +rag.insert() + +rag.query("你的问题1", param=QueryParam(mode="naive")) +rag.query("你的问题2", param=QueryParam(mode="mix")) + +# 显示总token使用量(包含插入和查询操作) +print("Token usage:", token_tracker.get_usage()) +``` + +#### 使用建议 +- 在长会话或批量操作中使用上下文管理器,可以自动跟踪所有token消耗 +- 对于需要分段统计的场景,使用手动模式并适时调用reset() +- 定期检查token使用情况,有助于及时发现异常消耗 +- 在开发测试阶段积极使用此功能,以便优化生产环境的成本 + +#### 实际应用示例 +您可以参考以下示例来实现token统计: +- `examples/lightrag_gemini_track_token_demo.py`:使用Google Gemini模型的token统计示例 +- `examples/lightrag_siliconcloud_track_token_demo.py`:使用SiliconCloud模型的token统计示例 + +这些示例展示了如何在不同模型和场景下有效地使用TokenTracker功能。 + +
+ ### 对话历史 LightRAG现在通过对话历史功能支持多轮对话。以下是使用方法: diff --git a/README.md b/README.md index 33bb9278..e154c719 100644 --- a/README.md +++ b/README.md @@ -440,11 +440,65 @@ if __name__ == "__main__": - [Direct OpenAI Example](examples/lightrag_llamaindex_direct_demo.py) - [LiteLLM Proxy Example](examples/lightrag_llamaindex_litellm_demo.py) + + +### Token Usage Tracking + +
+ Overview and Usage + +LightRAG provides a TokenTracker tool to monitor and manage token consumption by large language models. This feature is particularly useful for controlling API costs and optimizing performance. + +#### Usage + +```python +from lightrag.utils import TokenTracker + +# Create TokenTracker instance +token_tracker = TokenTracker() + +# Method 1: Using context manager (Recommended) +# Suitable for scenarios requiring automatic token usage tracking +with token_tracker: + result1 = await llm_model_func("your question 1") + result2 = await llm_model_func("your question 2") + +# Method 2: Manually adding token usage records +# Suitable for scenarios requiring more granular control over token statistics +token_tracker.reset() + +rag.insert() + +rag.query("your question 1", param=QueryParam(mode="naive")) +rag.query("your question 2", param=QueryParam(mode="mix")) + +# Display total token usage (including insert and query operations) +print("Token usage:", token_tracker.get_usage()) +``` + +#### Usage Tips +- Use context managers for long sessions or batch operations to automatically track all token consumption +- For scenarios requiring segmented statistics, use manual mode and call reset() when appropriate +- Regular checking of token usage helps detect abnormal consumption early +- Actively use this feature during development and testing to optimize production costs + +#### Practical Examples +You can refer to these examples for implementing token tracking: +- `examples/lightrag_gemini_track_token_demo.py`: Token tracking example using Google Gemini model +- `examples/lightrag_siliconcloud_track_token_demo.py`: Token tracking example using SiliconCloud model + +These examples demonstrate how to effectively use the TokenTracker feature with different models and scenarios. + +
+ ### Conversation History Support LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it: +
+ Usage Example + ```python # Create conversation history conversation_history = [ @@ -467,10 +521,15 @@ response = rag.query( ) ``` +
+ ### Custom Prompt Support LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it: +
+ Usage Example + ```python # Create query parameters query_param = QueryParam( @@ -505,6 +564,8 @@ response_custom = rag.query( print(response_custom) ``` +
+ ### Separate Keyword Extraction We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords. @@ -518,7 +579,8 @@ The function operates by dividing the input into two parts: It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question. -**Usage Example** +
+ Usage Example This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students. @@ -530,67 +592,6 @@ rag.query_with_separate_keyword_extraction( ) ``` -### Insert Custom KG - -```python -custom_kg = { - "chunks": [ - { - "content": "Alice and Bob are collaborating on quantum computing research.", - "source_id": "doc-1" - } - ], - "entities": [ - { - "entity_name": "Alice", - "entity_type": "person", - "description": "Alice is a researcher specializing in quantum physics.", - "source_id": "doc-1" - }, - { - "entity_name": "Bob", - "entity_type": "person", - "description": "Bob is a mathematician.", - "source_id": "doc-1" - }, - { - "entity_name": "Quantum Computing", - "entity_type": "technology", - "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", - "source_id": "doc-1" - } - ], - "relationships": [ - { - "src_id": "Alice", - "tgt_id": "Bob", - "description": "Alice and Bob are research partners.", - "keywords": "collaboration research", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Alice", - "tgt_id": "Quantum Computing", - "description": "Alice conducts research on quantum computing.", - "keywords": "research expertise", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Bob", - "tgt_id": "Quantum Computing", - "description": "Bob researches quantum computing.", - "keywords": "research application", - "weight": 1.0, - "source_id": "doc-1" - } - ] -} - -rag.insert_custom_kg(custom_kg) -``` -
## Insert @@ -682,6 +683,70 @@ rag.insert(text_content.decode('utf-8')) +
+ Insert Custom KG + +```python +custom_kg = { + "chunks": [ + { + "content": "Alice and Bob are collaborating on quantum computing research.", + "source_id": "doc-1" + } + ], + "entities": [ + { + "entity_name": "Alice", + "entity_type": "person", + "description": "Alice is a researcher specializing in quantum physics.", + "source_id": "doc-1" + }, + { + "entity_name": "Bob", + "entity_type": "person", + "description": "Bob is a mathematician.", + "source_id": "doc-1" + }, + { + "entity_name": "Quantum Computing", + "entity_type": "technology", + "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", + "source_id": "doc-1" + } + ], + "relationships": [ + { + "src_id": "Alice", + "tgt_id": "Bob", + "description": "Alice and Bob are research partners.", + "keywords": "collaboration research", + "weight": 1.0, + "source_id": "doc-1" + }, + { + "src_id": "Alice", + "tgt_id": "Quantum Computing", + "description": "Alice conducts research on quantum computing.", + "keywords": "research expertise", + "weight": 1.0, + "source_id": "doc-1" + }, + { + "src_id": "Bob", + "tgt_id": "Quantum Computing", + "description": "Bob researches quantum computing.", + "keywords": "research application", + "weight": 1.0, + "source_id": "doc-1" + } + ] +} + +rag.insert_custom_kg(custom_kg) +``` + +
+
Citation Functionality @@ -841,7 +906,8 @@ rag.delete_by_doc_id("doc_id") LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph. -### Create Entities and Relations +
+ Create Entities and Relations ```python # Create new entity @@ -864,7 +930,10 @@ relation = rag.create_relation("Google", "Gmail", { }) ``` -### Edit Entities and Relations +
+ +
+ Edit Entities and Relations ```python # Edit an existing entity @@ -901,6 +970,8 @@ All operations are available in both synchronous and asynchronous versions. The These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent. +
+ ## Data Export Functions ### Overview @@ -909,7 +980,8 @@ LightRAG allows you to export your knowledge graph data in various formats for a ### Export Functions -#### Basic Usage +
+ Basic Usage ```python # Basic CSV export (default format) @@ -919,7 +991,10 @@ rag.export_data("knowledge_graph.csv") rag.export_data("output.xlsx", file_format="excel") ``` -#### Different File Formats supported +
+ +
+ Different File Formats supported ```python #Export data in CSV format @@ -934,13 +1009,18 @@ rag.export_data("graph_data.md", file_format="md") # Export data in Text rag.export_data("graph_data.txt", file_format="txt") ``` -#### Additional Options +
+ +
+ Additional Options Include vector embeddings in the export (optional): ```python rag.export_data("complete_data.csv", include_vector_data=True) ``` +
+ ### Data Included in Export All exports include: diff --git a/examples/lightrag_gemini_track_token_demo.py b/examples/lightrag_gemini_track_token_demo.py index e169a562..a72fc717 100644 --- a/examples/lightrag_gemini_track_token_demo.py +++ b/examples/lightrag_gemini_track_token_demo.py @@ -115,38 +115,36 @@ def main(): # Initialize RAG instance rag = asyncio.run(initialize_rag()) - # Reset tracker before processing queries - token_tracker.reset() - with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="naive") + # Context Manager Method + with token_tracker: + print( + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) - ) - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="local") + print( + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) - ) - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="global") + print( + rag.query( + "What are the top themes in this story?", + param=QueryParam(mode="global"), + ) ) - ) - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="hybrid") + print( + rag.query( + "What are the top themes in this story?", + param=QueryParam(mode="hybrid"), + ) ) - ) - - # Display final token usage after main query - print("Token usage:", token_tracker.get_usage()) if __name__ == "__main__": diff --git a/examples/lightrag_siliconcloud_track_token_demo.py b/examples/lightrag_siliconcloud_track_token_demo.py index fbbe94b4..d82a30bc 100644 --- a/examples/lightrag_siliconcloud_track_token_demo.py +++ b/examples/lightrag_siliconcloud_track_token_demo.py @@ -44,14 +44,10 @@ async def embedding_func(texts: list[str]) -> np.ndarray: # function test async def test_funcs(): - # Reset tracker before processing queries - token_tracker.reset() - - result = await llm_model_func("How are you?") - print("llm_model_func: ", result) - - # Display final token usage after main query - print("Token usage:", token_tracker.get_usage()) + # Context Manager Method + with token_tracker: + result = await llm_model_func("How are you?") + print("llm_model_func: ", result) asyncio.run(test_funcs()) diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index c3cebd17..d9939809 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -44,6 +44,47 @@ class InvalidResponseError(Exception): pass +def create_openai_async_client( + api_key: str | None = None, + base_url: str | None = None, + client_configs: dict[str, Any] = None, +) -> AsyncOpenAI: + """Create an AsyncOpenAI client with the given configuration. + + Args: + api_key: OpenAI API key. If None, uses the OPENAI_API_KEY environment variable. + base_url: Base URL for the OpenAI API. If None, uses the default OpenAI API URL. + client_configs: Additional configuration options for the AsyncOpenAI client. + These will override any default configurations but will be overridden by + explicit parameters (api_key, base_url). + + Returns: + An AsyncOpenAI client instance. + """ + if not api_key: + api_key = os.environ["OPENAI_API_KEY"] + + default_headers = { + "User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}", + "Content-Type": "application/json", + } + + if client_configs is None: + client_configs = {} + + # Create a merged config dict with precedence: explicit params > client_configs > defaults + merged_configs = { + **client_configs, + "default_headers": default_headers, + "api_key": api_key, + } + + if base_url is not None: + merged_configs["base_url"] = base_url + + return AsyncOpenAI(**merged_configs) + + @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), @@ -61,29 +102,52 @@ async def openai_complete_if_cache( token_tracker: Any | None = None, **kwargs: Any, ) -> str: + """Complete a prompt using OpenAI's API with caching support. + + Args: + model: The OpenAI model to use. + prompt: The prompt to complete. + system_prompt: Optional system prompt to include. + history_messages: Optional list of previous messages in the conversation. + base_url: Optional base URL for the OpenAI API. + api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable. + **kwargs: Additional keyword arguments to pass to the OpenAI API. + Special kwargs: + - openai_client_configs: Dict of configuration options for the AsyncOpenAI client. + These will be passed to the client constructor but will be overridden by + explicit parameters (api_key, base_url). + - hashing_kv: Will be removed from kwargs before passing to OpenAI. + - keyword_extraction: Will be removed from kwargs before passing to OpenAI. + + Returns: + The completed text or an async iterator of text chunks if streaming. + + Raises: + InvalidResponseError: If the response from OpenAI is invalid or empty. + APIConnectionError: If there is a connection error with the OpenAI API. + RateLimitError: If the OpenAI API rate limit is exceeded. + APITimeoutError: If the OpenAI API request times out. + """ if history_messages is None: history_messages = [] - if not api_key: - api_key = os.environ["OPENAI_API_KEY"] - - default_headers = { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}", - "Content-Type": "application/json", - } # Set openai logger level to INFO when VERBOSE_DEBUG is off if not VERBOSE_DEBUG and logger.level == logging.DEBUG: logging.getLogger("openai").setLevel(logging.INFO) - openai_async_client = ( - AsyncOpenAI(default_headers=default_headers, api_key=api_key) - if base_url is None - else AsyncOpenAI( - base_url=base_url, default_headers=default_headers, api_key=api_key - ) + # Extract client configuration options + client_configs = kwargs.pop("openai_client_configs", {}) + + # Create the OpenAI client + openai_async_client = create_openai_async_client( + api_key=api_key, base_url=base_url, client_configs=client_configs ) + + # Remove special kwargs that shouldn't be passed to OpenAI kwargs.pop("hashing_kv", None) kwargs.pop("keyword_extraction", None) + + # Prepare messages messages: list[dict[str, Any]] = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) @@ -272,21 +336,32 @@ async def openai_embed( model: str = "text-embedding-3-small", base_url: str = None, api_key: str = None, + client_configs: dict[str, Any] = None, ) -> np.ndarray: - if not api_key: - api_key = os.environ["OPENAI_API_KEY"] + """Generate embeddings for a list of texts using OpenAI's API. - default_headers = { - "User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}", - "Content-Type": "application/json", - } - openai_async_client = ( - AsyncOpenAI(default_headers=default_headers, api_key=api_key) - if base_url is None - else AsyncOpenAI( - base_url=base_url, default_headers=default_headers, api_key=api_key - ) + Args: + texts: List of texts to embed. + model: The OpenAI embedding model to use. + base_url: Optional base URL for the OpenAI API. + api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable. + client_configs: Additional configuration options for the AsyncOpenAI client. + These will override any default configurations but will be overridden by + explicit parameters (api_key, base_url). + + Returns: + A numpy array of embeddings, one per input text. + + Raises: + APIConnectionError: If there is a connection error with the OpenAI API. + RateLimitError: If the OpenAI API rate limit is exceeded. + APITimeoutError: If the OpenAI API request times out. + """ + # Create the OpenAI client + openai_async_client = create_openai_async_client( + api_key=api_key, base_url=base_url, client_configs=client_configs ) + response = await openai_async_client.embeddings.create( model=model, input=texts, encoding_format="float" ) diff --git a/lightrag/operate.py b/lightrag/operate.py index a944860a..634326a7 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -697,8 +697,7 @@ async def kg_query( if cached_response is not None: return cached_response - # Extract keywords using extract_keywords_only function which already supports conversation history - hl_keywords, ll_keywords = await extract_keywords_only( + hl_keywords, ll_keywords = await get_keywords_from_query( query, query_param, global_config, hashing_kv ) @@ -794,6 +793,38 @@ async def kg_query( return response +async def get_keywords_from_query( + query: str, + query_param: QueryParam, + global_config: dict[str, str], + hashing_kv: BaseKVStorage | None = None, +) -> tuple[list[str], list[str]]: + """ + Retrieves high-level and low-level keywords for RAG operations. + + This function checks if keywords are already provided in query parameters, + and if not, extracts them from the query text using LLM. + + Args: + query: The user's query text + query_param: Query parameters that may contain pre-defined keywords + global_config: Global configuration dictionary + hashing_kv: Optional key-value storage for caching results + + Returns: + A tuple containing (high_level_keywords, low_level_keywords) + """ + # Check if pre-defined keywords are already provided + if query_param.hl_keywords or query_param.ll_keywords: + return query_param.hl_keywords, query_param.ll_keywords + + # Extract keywords using extract_keywords_only function which already supports conversation history + hl_keywords, ll_keywords = await extract_keywords_only( + query, query_param, global_config, hashing_kv + ) + return hl_keywords, ll_keywords + + async def extract_keywords_only( text: str, param: QueryParam, @@ -934,8 +965,7 @@ async def mix_kg_vector_query( # 2. Execute knowledge graph and vector searches in parallel async def get_kg_context(): try: - # Extract keywords using extract_keywords_only function which already supports conversation history - hl_keywords, ll_keywords = await extract_keywords_only( + hl_keywords, ll_keywords = await get_keywords_from_query( query, query_param, global_config, hashing_kv ) @@ -983,7 +1013,6 @@ async def mix_kg_vector_query( try: # Reduce top_k for vector search in hybrid mode since we have structured information from KG mix_topk = min(10, query_param.top_k) - # TODO: add ids to the query results = await chunks_vdb.query( augmented_query, top_k=mix_topk, ids=query_param.ids ) @@ -1581,9 +1610,7 @@ async def _get_edge_data( text_units_section_list = [["id", "content", "file_path"]] for i, t in enumerate(use_text_units): - text_units_section_list.append( - [i, t["content"], t.get("file_path", "unknown_source")] - ) + text_units_section_list.append([i, t["content"], t.get("file_path", "unknown")]) text_units_context = list_of_list_to_csv(text_units_section_list) return entities_context, relations_context, text_units_context @@ -2017,16 +2044,13 @@ async def query_with_keywords( Query response or async iterator """ # Extract keywords - hl_keywords, ll_keywords = await extract_keywords_only( - text=query, - param=param, + hl_keywords, ll_keywords = await get_keywords_from_query( + query=query, + query_param=param, global_config=global_config, hashing_kv=hashing_kv, ) - param.hl_keywords = hl_keywords - param.ll_keywords = ll_keywords - # Create a new string with the prompt and the keywords ll_keywords_str = ", ".join(ll_keywords) hl_keywords_str = ", ".join(hl_keywords) diff --git a/lightrag/utils.py b/lightrag/utils.py index 44a85425..4515e080 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -962,6 +962,13 @@ class TokenTracker: def __init__(self): self.reset() + def __enter__(self): + self.reset() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + print(self) + def reset(self): self.prompt_tokens = 0 self.completion_tokens = 0 diff --git a/lightrag_webui/src/hooks/useLightragGraph.tsx b/lightrag_webui/src/hooks/useLightragGraph.tsx index deac239f..eadd67fb 100644 --- a/lightrag_webui/src/hooks/useLightragGraph.tsx +++ b/lightrag_webui/src/hooks/useLightragGraph.tsx @@ -205,7 +205,7 @@ const createSigmaGraph = (rawGraph: RawGraph | null) => { // Add edges from raw graph data for (const rawEdge of rawGraph?.edges ?? []) { rawEdge.dynamicId = graph.addDirectedEdge(rawEdge.source, rawEdge.target, { - label: rawEdge.type || undefined + label: rawEdge.properties?.keywords || undefined }) } @@ -666,7 +666,7 @@ const useLightrangeGraph = () => { // Add the edge to the sigma graph newEdge.dynamicId = sigmaGraph.addDirectedEdge(newEdge.source, newEdge.target, { - label: newEdge.type || undefined + label: newEdge.properties?.keywords || undefined }); // Add the edge to the raw graph