From d45dc14069b9d3555236ac1b33fa897f71516588 Mon Sep 17 00:00:00 2001
From: Shane Walker <shane.walker@lowes.com>
Date: Thu, 27 Mar 2025 15:39:39 -0700
Subject: [PATCH 01/11] feat(openai): add client configuration support to
 OpenAI integration

Add support for custom client configurations in the OpenAI integration,
allowing for more flexible configuration of the AsyncOpenAI client.
This includes:

- Create a reusable helper function `create_openai_async_client`
- Add proper documentation for client configuration options
- Ensure consistent parameter precedence across the codebase
- Update the embedding function to support client configurations
- Add example script demonstrating custom client configuration usage

The changes maintain backward compatibility while providing a cleaner
and more maintainable approach to configuring OpenAI clients.
---
 lightrag/llm/openai.py | 127 ++++++++++++++++++++++++++++++++---------
 1 file changed, 101 insertions(+), 26 deletions(-)

diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index 70aa0ceb..394c4370 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -44,6 +44,43 @@ class InvalidResponseError(Exception):
     pass
 
 
+def create_openai_async_client(
+    api_key: str | None = None,
+    base_url: str | None = None,
+    client_configs: dict[str, Any] = None,
+) -> AsyncOpenAI:
+    """Create an AsyncOpenAI client with the given configuration.
+    
+    Args:
+        api_key: OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
+        base_url: Base URL for the OpenAI API. If None, uses the default OpenAI API URL.
+        client_configs: Additional configuration options for the AsyncOpenAI client.
+            These will override any default configurations but will be overridden by
+            explicit parameters (api_key, base_url).
+            
+    Returns:
+        An AsyncOpenAI client instance.
+    """
+    if not api_key:
+        api_key = os.environ["OPENAI_API_KEY"]
+        
+    default_headers = {
+        "User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
+        "Content-Type": "application/json",
+    }
+    
+    if client_configs is None:
+        client_configs = {}
+        
+    # Create a merged config dict with precedence: explicit params > client_configs > defaults
+    merged_configs = {**client_configs, "default_headers": default_headers, "api_key": api_key}
+    
+    if base_url is not None:
+        merged_configs["base_url"] = base_url
+        
+    return AsyncOpenAI(**merged_configs)
+
+
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -60,29 +97,54 @@ async def openai_complete_if_cache(
     api_key: str | None = None,
     **kwargs: Any,
 ) -> str:
+    """Complete a prompt using OpenAI's API with caching support.
+    
+    Args:
+        model: The OpenAI model to use.
+        prompt: The prompt to complete.
+        system_prompt: Optional system prompt to include.
+        history_messages: Optional list of previous messages in the conversation.
+        base_url: Optional base URL for the OpenAI API.
+        api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
+        **kwargs: Additional keyword arguments to pass to the OpenAI API.
+            Special kwargs:
+            - openai_client_configs: Dict of configuration options for the AsyncOpenAI client.
+                These will be passed to the client constructor but will be overridden by
+                explicit parameters (api_key, base_url).
+            - hashing_kv: Will be removed from kwargs before passing to OpenAI.
+            - keyword_extraction: Will be removed from kwargs before passing to OpenAI.
+            
+    Returns:
+        The completed text or an async iterator of text chunks if streaming.
+        
+    Raises:
+        InvalidResponseError: If the response from OpenAI is invalid or empty.
+        APIConnectionError: If there is a connection error with the OpenAI API.
+        RateLimitError: If the OpenAI API rate limit is exceeded.
+        APITimeoutError: If the OpenAI API request times out.
+    """
     if history_messages is None:
         history_messages = []
-    if not api_key:
-        api_key = os.environ["OPENAI_API_KEY"]
-
-    default_headers = {
-        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
-        "Content-Type": "application/json",
-    }
 
     # Set openai logger level to INFO when VERBOSE_DEBUG is off
     if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
         logging.getLogger("openai").setLevel(logging.INFO)
 
-    openai_async_client = (
-        AsyncOpenAI(default_headers=default_headers, api_key=api_key)
-        if base_url is None
-        else AsyncOpenAI(
-            base_url=base_url, default_headers=default_headers, api_key=api_key
-        )
+    # Extract client configuration options
+    client_configs = kwargs.pop("openai_client_configs", {})
+    
+    # Create the OpenAI client
+    openai_async_client = create_openai_async_client(
+        api_key=api_key,
+        base_url=base_url,
+        client_configs=client_configs
     )
+
+    # Remove special kwargs that shouldn't be passed to OpenAI
     kwargs.pop("hashing_kv", None)
     kwargs.pop("keyword_extraction", None)
+    
+    # Prepare messages
     messages: list[dict[str, Any]] = []
     if system_prompt:
         messages.append({"role": "system", "content": system_prompt})
@@ -257,21 +319,34 @@ async def openai_embed(
     model: str = "text-embedding-3-small",
     base_url: str = None,
     api_key: str = None,
+    client_configs: dict[str, Any] = None,
 ) -> np.ndarray:
-    if not api_key:
-        api_key = os.environ["OPENAI_API_KEY"]
-
-    default_headers = {
-        "User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
-        "Content-Type": "application/json",
-    }
-    openai_async_client = (
-        AsyncOpenAI(default_headers=default_headers, api_key=api_key)
-        if base_url is None
-        else AsyncOpenAI(
-            base_url=base_url, default_headers=default_headers, api_key=api_key
-        )
+    """Generate embeddings for a list of texts using OpenAI's API.
+    
+    Args:
+        texts: List of texts to embed.
+        model: The OpenAI embedding model to use.
+        base_url: Optional base URL for the OpenAI API.
+        api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
+        client_configs: Additional configuration options for the AsyncOpenAI client.
+            These will override any default configurations but will be overridden by
+            explicit parameters (api_key, base_url).
+            
+    Returns:
+        A numpy array of embeddings, one per input text.
+        
+    Raises:
+        APIConnectionError: If there is a connection error with the OpenAI API.
+        RateLimitError: If the OpenAI API rate limit is exceeded.
+        APITimeoutError: If the OpenAI API request times out.
+    """
+    # Create the OpenAI client
+    openai_async_client = create_openai_async_client(
+        api_key=api_key,
+        base_url=base_url,
+        client_configs=client_configs
     )
+    
     response = await openai_async_client.embeddings.create(
         model=model, input=texts, encoding_format="float"
     )

From d0eb602b12527f3511c197f60bc92479662246ed Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Sat, 29 Mar 2025 21:25:34 +0800
Subject: [PATCH 02/11] Update README.md

---
 README.md | 159 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 92 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md
index d3716416..b96eed21 100644
--- a/README.md
+++ b/README.md
@@ -441,11 +441,15 @@ if __name__ == "__main__":
 - [Direct OpenAI Example](examples/lightrag_llamaindex_direct_demo.py)
 - [LiteLLM Proxy Example](examples/lightrag_llamaindex_litellm_demo.py)
 
+</details>
+
 ### Conversation History Support
 
 
 LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it:
 
+<details>
+
 ```python
 # Create conversation history
 conversation_history = [
@@ -506,6 +510,8 @@ response_custom = rag.query(
 print(response_custom)
 ```
 
+</details>
+
 ### Separate Keyword Extraction
 
 We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords.
@@ -519,7 +525,8 @@ The function operates by dividing the input into two parts:
 
 It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question.
 
-**Usage Example**
+<details>
+  <summary> <b> Usage Example </b></summary>
 
 This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students.
 
@@ -531,67 +538,6 @@ rag.query_with_separate_keyword_extraction(
 )
 ```
 
-### Insert Custom KG
-
-```python
-custom_kg = {
-    "chunks": [
-        {
-            "content": "Alice and Bob are collaborating on quantum computing research.",
-            "source_id": "doc-1"
-        }
-    ],
-    "entities": [
-        {
-            "entity_name": "Alice",
-            "entity_type": "person",
-            "description": "Alice is a researcher specializing in quantum physics.",
-            "source_id": "doc-1"
-        },
-        {
-            "entity_name": "Bob",
-            "entity_type": "person",
-            "description": "Bob is a mathematician.",
-            "source_id": "doc-1"
-        },
-        {
-            "entity_name": "Quantum Computing",
-            "entity_type": "technology",
-            "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
-            "source_id": "doc-1"
-        }
-    ],
-    "relationships": [
-        {
-            "src_id": "Alice",
-            "tgt_id": "Bob",
-            "description": "Alice and Bob are research partners.",
-            "keywords": "collaboration research",
-            "weight": 1.0,
-            "source_id": "doc-1"
-        },
-        {
-            "src_id": "Alice",
-            "tgt_id": "Quantum Computing",
-            "description": "Alice conducts research on quantum computing.",
-            "keywords": "research expertise",
-            "weight": 1.0,
-            "source_id": "doc-1"
-        },
-        {
-            "src_id": "Bob",
-            "tgt_id": "Quantum Computing",
-            "description": "Bob researches quantum computing.",
-            "keywords": "research application",
-            "weight": 1.0,
-            "source_id": "doc-1"
-        }
-    ]
-}
-
-rag.insert_custom_kg(custom_kg)
-```
-
 </details>
 
 ## Insert
@@ -683,6 +629,70 @@ rag.insert(text_content.decode('utf-8'))
 
 </details>
 
+<details>
+  <summary> <b> Insert Custom KG </b></summary>
+
+```python
+custom_kg = {
+    "chunks": [
+        {
+            "content": "Alice and Bob are collaborating on quantum computing research.",
+            "source_id": "doc-1"
+        }
+    ],
+    "entities": [
+        {
+            "entity_name": "Alice",
+            "entity_type": "person",
+            "description": "Alice is a researcher specializing in quantum physics.",
+            "source_id": "doc-1"
+        },
+        {
+            "entity_name": "Bob",
+            "entity_type": "person",
+            "description": "Bob is a mathematician.",
+            "source_id": "doc-1"
+        },
+        {
+            "entity_name": "Quantum Computing",
+            "entity_type": "technology",
+            "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
+            "source_id": "doc-1"
+        }
+    ],
+    "relationships": [
+        {
+            "src_id": "Alice",
+            "tgt_id": "Bob",
+            "description": "Alice and Bob are research partners.",
+            "keywords": "collaboration research",
+            "weight": 1.0,
+            "source_id": "doc-1"
+        },
+        {
+            "src_id": "Alice",
+            "tgt_id": "Quantum Computing",
+            "description": "Alice conducts research on quantum computing.",
+            "keywords": "research expertise",
+            "weight": 1.0,
+            "source_id": "doc-1"
+        },
+        {
+            "src_id": "Bob",
+            "tgt_id": "Quantum Computing",
+            "description": "Bob researches quantum computing.",
+            "keywords": "research application",
+            "weight": 1.0,
+            "source_id": "doc-1"
+        }
+    ]
+}
+
+rag.insert_custom_kg(custom_kg)
+```
+
+</details>
+
 <details>
   <summary><b>Citation Functionality</b></summary>
 
@@ -842,7 +852,8 @@ rag.delete_by_doc_id("doc_id")
 
 LightRAG now supports comprehensive knowledge graph management capabilities, allowing you to create, edit, and delete entities and relationships within your knowledge graph.
 
-### Create Entities and Relations
+<details>
+  <summary> <b> Create Entities and Relations </b></summary>
 
 ```python
 # Create new entity
@@ -865,7 +876,10 @@ relation = rag.create_relation("Google", "Gmail", {
 })
 ```
 
-### Edit Entities and Relations
+</details>
+
+<details>
+  <summary> <b> Edit Entities and Relations </b></summary>
 
 ```python
 # Edit an existing entity
@@ -902,6 +916,8 @@ All operations are available in both synchronous and asynchronous versions. The
 
 These operations maintain data consistency across both the graph database and vector database components, ensuring your knowledge graph remains coherent.
 
+</details>
+
 ## Data Export Functions
 
 ### Overview
@@ -910,7 +926,8 @@ LightRAG allows you to export your knowledge graph data in various formats for a
 
 ### Export Functions
 
-#### Basic Usage
+<details>
+  <summary> <b> Basic Usage </b></summary>
 
 ```python
 # Basic CSV export (default format)
@@ -920,7 +937,10 @@ rag.export_data("knowledge_graph.csv")
 rag.export_data("output.xlsx", file_format="excel")
 ```
 
-#### Different File Formats supported
+</details>
+
+<details>
+  <summary> <b> Different File Formats supported </b></summary>
 
 ```python
 #Export data in CSV format
@@ -935,13 +955,18 @@ rag.export_data("graph_data.md", file_format="md")
 # Export data in Text
 rag.export_data("graph_data.txt", file_format="txt")
 ```
-#### Additional Options
+</details>
+
+<details>
+  <summary> <b> Additional Options </b></summary>
 
 Include vector embeddings in the export (optional):
 
 ```python
 rag.export_data("complete_data.csv", include_vector_data=True)
 ```
+</details>
+
 ### Data Included in Export
 
 All exports include:

From 6eea8bdf5de025d10bc0798824e84bb1254ecc9f Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Sat, 29 Mar 2025 21:28:25 +0800
Subject: [PATCH 03/11] Update README.md

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index b96eed21..6c8861a2 100644
--- a/README.md
+++ b/README.md
@@ -449,6 +449,7 @@ if __name__ == "__main__":
 LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it:
 
 <details>
+  <summary> <b> Usage Example </b></summary>
 
 ```python
 # Create conversation history
@@ -472,10 +473,15 @@ response = rag.query(
 )
 ```
 
+</details>
+
 ### Custom Prompt Support
 
 LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it:
 
+<details>
+  <summary> <b> Usage Example </b></summary>
+
 ```python
 # Create query parameters
 query_param = QueryParam(

From 9518360021d3a100d7372e8400382e88d00670ca Mon Sep 17 00:00:00 2001
From: choizhang <choizhang@qq.com>
Date: Sat, 29 Mar 2025 23:27:40 +0800
Subject: [PATCH 04/11] fix(useLightragGraph): Change the label of the edge
 from type to keyword

---
 lightrag_webui/src/hooks/useLightragGraph.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightrag_webui/src/hooks/useLightragGraph.tsx b/lightrag_webui/src/hooks/useLightragGraph.tsx
index 2ee28749..92da7189 100644
--- a/lightrag_webui/src/hooks/useLightragGraph.tsx
+++ b/lightrag_webui/src/hooks/useLightragGraph.tsx
@@ -205,7 +205,7 @@ const createSigmaGraph = (rawGraph: RawGraph | null) => {
   // Add edges from raw graph data
   for (const rawEdge of rawGraph?.edges ?? []) {
     rawEdge.dynamicId = graph.addDirectedEdge(rawEdge.source, rawEdge.target, {
-      label: rawEdge.type || undefined
+      label: rawEdge.properties?.keywords || undefined
     })
   }
 
@@ -660,7 +660,7 @@ const useLightrangeGraph = () => {
 
           // Add the edge to the sigma graph
           newEdge.dynamicId = sigmaGraph.addDirectedEdge(newEdge.source, newEdge.target, {
-            label: newEdge.type || undefined
+            label: newEdge.properties?.keywords || undefined
           });
 
           // Add the edge to the raw graph

From 164faf94e2a6583ce994558858434f653feff115 Mon Sep 17 00:00:00 2001
From: choizhang <choizhang@qq.com>
Date: Sun, 30 Mar 2025 00:59:23 +0800
Subject: [PATCH 05/11] feat(TokenTracker): Add context manager support to
 simplify token tracking

---
 examples/lightrag_gemini_track_token_demo.py  | 42 +++++++++----------
 .../lightrag_siliconcloud_track_token_demo.py | 12 ++----
 lightrag/utils.py                             |  7 ++++
 3 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/examples/lightrag_gemini_track_token_demo.py b/examples/lightrag_gemini_track_token_demo.py
index e169a562..a72fc717 100644
--- a/examples/lightrag_gemini_track_token_demo.py
+++ b/examples/lightrag_gemini_track_token_demo.py
@@ -115,38 +115,36 @@ def main():
     # Initialize RAG instance
     rag = asyncio.run(initialize_rag())
 
-    # Reset tracker before processing queries
-    token_tracker.reset()
-
     with open("./book.txt", "r", encoding="utf-8") as f:
         rag.insert(f.read())
 
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="naive")
+    # Context Manager Method
+    with token_tracker:
+        print(
+            rag.query(
+                "What are the top themes in this story?", param=QueryParam(mode="naive")
+            )
         )
-    )
 
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="local")
+        print(
+            rag.query(
+                "What are the top themes in this story?", param=QueryParam(mode="local")
+            )
         )
-    )
 
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="global")
+        print(
+            rag.query(
+                "What are the top themes in this story?",
+                param=QueryParam(mode="global"),
+            )
         )
-    )
 
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        print(
+            rag.query(
+                "What are the top themes in this story?",
+                param=QueryParam(mode="hybrid"),
+            )
         )
-    )
-
-    # Display final token usage after main query
-    print("Token usage:", token_tracker.get_usage())
 
 
 if __name__ == "__main__":
diff --git a/examples/lightrag_siliconcloud_track_token_demo.py b/examples/lightrag_siliconcloud_track_token_demo.py
index fbbe94b4..d82a30bc 100644
--- a/examples/lightrag_siliconcloud_track_token_demo.py
+++ b/examples/lightrag_siliconcloud_track_token_demo.py
@@ -44,14 +44,10 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
 
 # function test
 async def test_funcs():
-    # Reset tracker before processing queries
-    token_tracker.reset()
-
-    result = await llm_model_func("How are you?")
-    print("llm_model_func: ", result)
-
-    # Display final token usage after main query
-    print("Token usage:", token_tracker.get_usage())
+    # Context Manager Method
+    with token_tracker:
+        result = await llm_model_func("How are you?")
+        print("llm_model_func: ", result)
 
 
 asyncio.run(test_funcs())
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 44a85425..4515e080 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -962,6 +962,13 @@ class TokenTracker:
     def __init__(self):
         self.reset()
 
+    def __enter__(self):
+        self.reset()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        print(self)
+
     def reset(self):
         self.prompt_tokens = 0
         self.completion_tokens = 0

From f349618e37a3d7aae8bfd225282293ee58625862 Mon Sep 17 00:00:00 2001
From: jofoks <jlmillenaar@gmail.com>
Date: Mon, 31 Mar 2025 14:50:13 -0700
Subject: [PATCH 06/11] Fix: unknown filepath errors

---
 lightrag/operate.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 5da02683..dcf833c2 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1005,7 +1005,6 @@ async def mix_kg_vector_query(
         try:
             # Reduce top_k for vector search in hybrid mode since we have structured information from KG
             mix_topk = min(10, query_param.top_k)
-            # TODO: add ids to the query
             results = await chunks_vdb.query(
                 augmented_query, top_k=mix_topk, ids=query_param.ids
             )
@@ -1601,7 +1600,7 @@ async def _get_edge_data(
 
     text_units_section_list = [["id", "content", "file_path"]]
     for i, t in enumerate(use_text_units):
-        text_units_section_list.append([i, t["content"], t["file_path"]])
+        text_units_section_list.append([i, t["content"], t.get("file_path", "unknown")])
     text_units_context = list_of_list_to_csv(text_units_section_list)
     return entities_context, relations_context, text_units_context
 

From ad1d362865da9254ac3c18ec662f59e55321290c Mon Sep 17 00:00:00 2001
From: choizhang <choizhang@qq.com>
Date: Tue, 1 Apr 2025 23:50:14 +0800
Subject: [PATCH 07/11] docs: Add Token Statistics Function Description in
 README

---
 README-zh.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 README.md    | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)

diff --git a/README-zh.md b/README-zh.md
index 784fd1f2..94a11b61 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -410,6 +410,54 @@ if __name__ == "__main__":
 
 </details>
 
+### Token统计功能
+<details>
+<summary> <b>概述和使用</b> </summary>
+
+LightRAG提供了TokenTracker工具来跟踪和管理大模型的token消耗。这个功能对于控制API成本和优化性能特别有用。
+
+#### 使用方法
+
+```python
+from lightrag.utils import TokenTracker
+
+# 创建TokenTracker实例
+token_tracker = TokenTracker()
+
+# 方法1：使用上下文管理器（推荐）
+# 适用于需要自动跟踪token使用的场景
+with token_tracker:
+    result1 = await llm_model_func("你的问题1")
+    result2 = await llm_model_func("你的问题2")
+
+# 方法2：手动添加token使用记录
+# 适用于需要更精细控制token统计的场景
+token_tracker.reset()
+
+rag.insert()
+
+rag.query("你的问题1", param=QueryParam(mode="naive"))
+rag.query("你的问题2", param=QueryParam(mode="mix"))
+
+# 显示总token使用量（包含插入和查询操作）
+print("Token usage:", token_tracker.get_usage())
+```
+
+#### 使用建议
+- 在长会话或批量操作中使用上下文管理器，可以自动跟踪所有token消耗
+- 对于需要分段统计的场景，使用手动模式并适时调用reset()
+- 定期检查token使用情况，有助于及时发现异常消耗
+- 在开发测试阶段积极使用此功能，以便优化生产环境的成本
+
+#### 实际应用示例
+您可以参考以下示例来实现token统计：
+- `examples/lightrag_gemini_track_token_demo.py`：使用Google Gemini模型的token统计示例
+- `examples/lightrag_siliconcloud_track_token_demo.py`：使用SiliconCloud模型的token统计示例
+
+这些示例展示了如何在不同模型和场景下有效地使用TokenTracker功能。
+
+</details>
+
 ### 对话历史
 
 LightRAG现在通过对话历史功能支持多轮对话。以下是使用方法：
diff --git a/README.md b/README.md
index 6c8861a2..0d04b015 100644
--- a/README.md
+++ b/README.md
@@ -443,6 +443,55 @@ if __name__ == "__main__":
 
 </details>
 
+### Token Usage Tracking
+
+<details>
+<summary> <b>Overview and Usage</b> </summary>
+
+LightRAG provides a TokenTracker tool to monitor and manage token consumption by large language models. This feature is particularly useful for controlling API costs and optimizing performance.
+
+#### Usage
+
+```python
+from lightrag.utils import TokenTracker
+
+# Create TokenTracker instance
+token_tracker = TokenTracker()
+
+# Method 1: Using context manager (Recommended)
+# Suitable for scenarios requiring automatic token usage tracking
+with token_tracker:
+    result1 = await llm_model_func("your question 1")
+    result2 = await llm_model_func("your question 2")
+
+# Method 2: Manually adding token usage records
+# Suitable for scenarios requiring more granular control over token statistics
+token_tracker.reset()
+
+rag.insert()
+
+rag.query("your question 1", param=QueryParam(mode="naive"))
+rag.query("your question 2", param=QueryParam(mode="mix"))
+
+# Display total token usage (including insert and query operations)
+print("Token usage:", token_tracker.get_usage())
+```
+
+#### Usage Tips
+- Use context managers for long sessions or batch operations to automatically track all token consumption
+- For scenarios requiring segmented statistics, use manual mode and call reset() when appropriate
+- Regular checking of token usage helps detect abnormal consumption early
+- Actively use this feature during development and testing to optimize production costs
+
+#### Practical Examples
+You can refer to these examples for implementing token tracking:
+- `examples/lightrag_gemini_track_token_demo.py`: Token tracking example using Google Gemini model
+- `examples/lightrag_siliconcloud_track_token_demo.py`: Token tracking example using SiliconCloud model
+
+These examples demonstrate how to effectively use the TokenTracker feature with different models and scenarios.
+
+</details>
+
 ### Conversation History Support
 
 

From 8e66b2a974452d93d0c3d88a3a7207a62d8a8060 Mon Sep 17 00:00:00 2001
From: Mykola Chaban <nchaban.kh@gmail.com>
Date: Wed, 2 Apr 2025 21:15:40 +0300
Subject: [PATCH 08/11] added additional verificaton if keywords already
 provided in query param do not run the generation process;

---
 lightrag/operate.py | 46 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index dcf833c2..c4a43ee9 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -719,8 +719,7 @@ async def kg_query(
     if cached_response is not None:
         return cached_response
 
-    # Extract keywords using extract_keywords_only function which already supports conversation history
-    hl_keywords, ll_keywords = await extract_keywords_only(
+    hl_keywords, ll_keywords = await get_keywords_from_query(
         query, query_param, global_config, hashing_kv
     )
 
@@ -816,6 +815,37 @@ async def kg_query(
     return response
 
 
+async def get_keywords_from_query(
+    query: str,
+    query_param: QueryParam,
+    global_config: dict[str, str],
+    hashing_kv: BaseKVStorage | None = None,
+) -> tuple[list[str], list[str]]:
+    """
+    Retrieves high-level and low-level keywords for RAG operations.
+    
+    This function checks if keywords are already provided in query parameters,
+    and if not, extracts them from the query text using LLM.
+    
+    Args:
+        query: The user's query text
+        query_param: Query parameters that may contain pre-defined keywords
+        global_config: Global configuration dictionary
+        hashing_kv: Optional key-value storage for caching results
+        
+    Returns:
+        A tuple containing (high_level_keywords, low_level_keywords)
+    """
+    if not query_param.hl_keywords.empty() and not query_param.ll_keywords.empty():
+        return query_param.hl_keywords, query_param.ll_keywords
+
+    # Extract keywords using extract_keywords_only function which already supports conversation history
+    hl_keywords, ll_keywords = await extract_keywords_only(
+        query, query_param, global_config, hashing_kv
+    )
+    return hl_keywords, ll_keywords
+
+
 async def extract_keywords_only(
     text: str,
     param: QueryParam,
@@ -956,8 +986,7 @@ async def mix_kg_vector_query(
     # 2. Execute knowledge graph and vector searches in parallel
     async def get_kg_context():
         try:
-            # Extract keywords using extract_keywords_only function which already supports conversation history
-            hl_keywords, ll_keywords = await extract_keywords_only(
+            hl_keywords, ll_keywords = await get_keywords_from_query(
                 query, query_param, global_config, hashing_kv
             )
 
@@ -2034,16 +2063,13 @@ async def query_with_keywords(
         Query response or async iterator
     """
     # Extract keywords
-    hl_keywords, ll_keywords = await extract_keywords_only(
-        text=query,
-        param=param,
+    hl_keywords, ll_keywords = await get_keywords_from_query(
+        query=query,
+        query_param=param,
         global_config=global_config,
         hashing_kv=hashing_kv,
     )
 
-    param.hl_keywords = hl_keywords
-    param.ll_keywords = ll_keywords
-
     # Create a new string with the prompt and the keywords
     ll_keywords_str = ", ".join(ll_keywords)
     hl_keywords_str = ", ".join(hl_keywords)

From ce1a59b1c0d40dc6f59a92b48f089ab73e60ebcf Mon Sep 17 00:00:00 2001
From: Mykola Chaban <nchaban.kh@gmail.com>
Date: Wed, 2 Apr 2025 21:52:06 +0300
Subject: [PATCH 09/11] Fix trailing whitespace in docstring

---
 lightrag/operate.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index c4a43ee9..91c42965 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -823,16 +823,16 @@ async def get_keywords_from_query(
 ) -> tuple[list[str], list[str]]:
     """
     Retrieves high-level and low-level keywords for RAG operations.
-    
+
     This function checks if keywords are already provided in query parameters,
     and if not, extracts them from the query text using LLM.
-    
+
     Args:
         query: The user's query text
         query_param: Query parameters that may contain pre-defined keywords
         global_config: Global configuration dictionary
         hashing_kv: Optional key-value storage for caching results
-        
+
     Returns:
         A tuple containing (high_level_keywords, low_level_keywords)
     """

From e17e61f58e2a5f5eddaca05fb75cbb6911286b8c Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Thu, 3 Apr 2025 14:44:56 +0800
Subject: [PATCH 10/11] fix lint

---
 lightrag/llm/openai.py | 46 +++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index f29d10c3..d9939809 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -50,34 +50,38 @@ def create_openai_async_client(
     client_configs: dict[str, Any] = None,
 ) -> AsyncOpenAI:
     """Create an AsyncOpenAI client with the given configuration.
-    
+
     Args:
         api_key: OpenAI API key. If None, uses the OPENAI_API_KEY environment variable.
         base_url: Base URL for the OpenAI API. If None, uses the default OpenAI API URL.
         client_configs: Additional configuration options for the AsyncOpenAI client.
             These will override any default configurations but will be overridden by
             explicit parameters (api_key, base_url).
-            
+
     Returns:
         An AsyncOpenAI client instance.
     """
     if not api_key:
         api_key = os.environ["OPENAI_API_KEY"]
-        
+
     default_headers = {
         "User-Agent": f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
         "Content-Type": "application/json",
     }
-    
+
     if client_configs is None:
         client_configs = {}
-        
+
     # Create a merged config dict with precedence: explicit params > client_configs > defaults
-    merged_configs = {**client_configs, "default_headers": default_headers, "api_key": api_key}
-    
+    merged_configs = {
+        **client_configs,
+        "default_headers": default_headers,
+        "api_key": api_key,
+    }
+
     if base_url is not None:
         merged_configs["base_url"] = base_url
-        
+
     return AsyncOpenAI(**merged_configs)
 
 
@@ -99,7 +103,7 @@ async def openai_complete_if_cache(
     **kwargs: Any,
 ) -> str:
     """Complete a prompt using OpenAI's API with caching support.
-    
+
     Args:
         model: The OpenAI model to use.
         prompt: The prompt to complete.
@@ -114,10 +118,10 @@ async def openai_complete_if_cache(
                 explicit parameters (api_key, base_url).
             - hashing_kv: Will be removed from kwargs before passing to OpenAI.
             - keyword_extraction: Will be removed from kwargs before passing to OpenAI.
-            
+
     Returns:
         The completed text or an async iterator of text chunks if streaming.
-        
+
     Raises:
         InvalidResponseError: If the response from OpenAI is invalid or empty.
         APIConnectionError: If there is a connection error with the OpenAI API.
@@ -133,18 +137,16 @@ async def openai_complete_if_cache(
 
     # Extract client configuration options
     client_configs = kwargs.pop("openai_client_configs", {})
-    
+
     # Create the OpenAI client
     openai_async_client = create_openai_async_client(
-        api_key=api_key,
-        base_url=base_url,
-        client_configs=client_configs
+        api_key=api_key, base_url=base_url, client_configs=client_configs
     )
 
     # Remove special kwargs that shouldn't be passed to OpenAI
     kwargs.pop("hashing_kv", None)
     kwargs.pop("keyword_extraction", None)
-    
+
     # Prepare messages
     messages: list[dict[str, Any]] = []
     if system_prompt:
@@ -337,7 +339,7 @@ async def openai_embed(
     client_configs: dict[str, Any] = None,
 ) -> np.ndarray:
     """Generate embeddings for a list of texts using OpenAI's API.
-    
+
     Args:
         texts: List of texts to embed.
         model: The OpenAI embedding model to use.
@@ -346,10 +348,10 @@ async def openai_embed(
         client_configs: Additional configuration options for the AsyncOpenAI client.
             These will override any default configurations but will be overridden by
             explicit parameters (api_key, base_url).
-            
+
     Returns:
         A numpy array of embeddings, one per input text.
-        
+
     Raises:
         APIConnectionError: If there is a connection error with the OpenAI API.
         RateLimitError: If the OpenAI API rate limit is exceeded.
@@ -357,11 +359,9 @@ async def openai_embed(
     """
     # Create the OpenAI client
     openai_async_client = create_openai_async_client(
-        api_key=api_key,
-        base_url=base_url,
-        client_configs=client_configs
+        api_key=api_key, base_url=base_url, client_configs=client_configs
     )
-    
+
     response = await openai_async_client.embeddings.create(
         model=model, input=texts, encoding_format="float"
     )

From 75e8a10c213572a8e7d2d58197ae16b92c6ac2fe Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Thu, 3 Apr 2025 17:46:28 +0800
Subject: [PATCH 11/11] Update get_keywords_from_query

---
 lightrag/operate.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 91c42965..088ca617 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -836,7 +836,8 @@ async def get_keywords_from_query(
     Returns:
         A tuple containing (high_level_keywords, low_level_keywords)
     """
-    if not query_param.hl_keywords.empty() and not query_param.ll_keywords.empty():
+    # Check if pre-defined keywords are already provided
+    if query_param.hl_keywords or query_param.ll_keywords:
         return query_param.hl_keywords, query_param.ll_keywords
 
     # Extract keywords using extract_keywords_only function which already supports conversation history