From 4219454fab85455de28f0a9412be35c4286b88b8 Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Sat, 1 Mar 2025 17:45:06 +0800
Subject: [PATCH 1/4] fix format

---
 lightrag/kg/neo4j_impl.py |  5 +--
 lightrag/operate.py       | 86 +++++++++++++++++++++++++--------------
 2 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/lightrag/kg/neo4j_impl.py b/lightrag/kg/neo4j_impl.py
index 0ddc611d..f5c2237a 100644
--- a/lightrag/kg/neo4j_impl.py
+++ b/lightrag/kg/neo4j_impl.py
@@ -280,10 +280,7 @@ class Neo4JStorage(BaseGraphStorage):
                 MATCH (start:`{entity_name_label_source}`)-[r]->(end:`{entity_name_label_target}`)
                 RETURN properties(r) as edge_properties
                 LIMIT 1
-                """.format(
-                    entity_name_label_source=entity_name_label_source,
-                    entity_name_label_target=entity_name_label_target,
-                )
+                """
 
                 result = await session.run(query)
                 record = await result.single()
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 3cf8b455..b52d1ef6 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -141,17 +141,18 @@ async def _handle_single_entity_extraction(
     if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
         return None
     # add this record as a node in the G
-    entity_name = clean_str(record_attributes[1].upper())
+    entity_name = clean_str(record_attributes[1]).strip('"')
     if not entity_name.strip():
         return None
-    entity_type = clean_str(record_attributes[2].upper())
-    entity_description = clean_str(record_attributes[3])
+    entity_type = clean_str(record_attributes[2]).strip('"')
+    entity_description = clean_str(record_attributes[3]).strip('"')
     entity_source_id = chunk_key
     return dict(
         entity_name=entity_name,
         entity_type=entity_type,
         description=entity_description,
         source_id=entity_source_id,
+        metadata={"created_at": time.time()},
     )
 
 
@@ -162,14 +163,15 @@ async def _handle_single_relationship_extraction(
     if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
         return None
     # add this record as edge
-    source = clean_str(record_attributes[1].upper())
-    target = clean_str(record_attributes[2].upper())
-    edge_description = clean_str(record_attributes[3])
-
-    edge_keywords = clean_str(record_attributes[4])
+    source = clean_str(record_attributes[1]).strip('"')
+    target = clean_str(record_attributes[2]).strip('"')
+    edge_description = clean_str(record_attributes[3]).strip('"')
+    edge_keywords = clean_str(record_attributes[4]).strip('"')
     edge_source_id = chunk_key
     weight = (
-        float(record_attributes[-1]) if is_float_regex(record_attributes[-1]) else 1.0
+        float(record_attributes[-1].strip('"'))
+        if is_float_regex(record_attributes[-1])
+        else 1.0
     )
     return dict(
         src_id=source,
@@ -547,9 +549,13 @@ async def extract_entities(
     if entity_vdb is not None:
         data_for_vdb = {
             compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
-                "content": dp["entity_name"] + dp["description"],
                 "entity_name": dp["entity_name"],
+                "entity_type": dp["entity_type"],
+                "content": f"{dp['entity_name']}\n{dp['description']}",
                 "source_id": dp["source_id"],
+                "metadata": {
+                    "created_at": dp.get("metadata", {}).get("created_at", time.time())
+                },
             }
             for dp in all_entities_data
         }
@@ -560,11 +566,9 @@ async def extract_entities(
             compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
                 "src_id": dp["src_id"],
                 "tgt_id": dp["tgt_id"],
+                "keywords": dp["keywords"],
+                "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
                 "source_id": dp["source_id"],
-                "content": dp["keywords"]
-                + dp["src_id"]
-                + dp["tgt_id"]
-                + dp["description"],
                 "metadata": {
                     "created_at": dp.get("metadata", {}).get("created_at", time.time())
                 },
@@ -960,7 +964,7 @@ async def mix_kg_vector_query(
         stream=query_param.stream,
     )
 
-    # 清理响应内容
+    # Clean up response content
     if isinstance(response, str) and len(response) > len(sys_prompt):
         response = (
             response.replace(sys_prompt, "")
@@ -972,7 +976,7 @@ async def mix_kg_vector_query(
             .strip()
         )
 
-        # 7. Save cache - 只有在收集完整响应后才缓存
+        # 7. Save cache - Only cache after collecting complete response
         await save_to_cache(
             hashing_kv,
             CacheData(
@@ -1128,8 +1132,19 @@ async def _get_node_data(
     )
 
     # build prompt
-    entites_section_list = [["id", "entity", "type", "description", "rank"]]
+    entites_section_list = [
+        [
+            "id",
+            "entity",
+            "type",
+            "description",
+            "rank" "created_at",
+        ]
+    ]
     for i, n in enumerate(node_datas):
+        created_at = n.get("created_at", "UNKNOWN")
+        if isinstance(created_at, (int, float)):
+            created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
         entites_section_list.append(
             [
                 i,
@@ -1137,6 +1152,7 @@ async def _get_node_data(
                 n.get("entity_type", "UNKNOWN"),
                 n.get("description", "UNKNOWN"),
                 n["rank"],
+                created_at,
             ]
         )
     entities_context = list_of_list_to_csv(entites_section_list)
@@ -1401,6 +1417,10 @@ async def _get_edge_data(
 
     entites_section_list = [["id", "entity", "type", "description", "rank"]]
     for i, n in enumerate(use_entities):
+        created_at = e.get("created_at", "Unknown")
+        # Convert timestamp to readable format
+        if isinstance(created_at, (int, float)):
+            created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
         entites_section_list.append(
             [
                 i,
@@ -1408,6 +1428,7 @@ async def _get_edge_data(
                 n.get("entity_type", "UNKNOWN"),
                 n.get("description", "UNKNOWN"),
                 n["rank"],
+                created_at,
             ]
         )
     entities_context = list_of_list_to_csv(entites_section_list)
@@ -1766,6 +1787,8 @@ async def kg_query_with_keywords(
         system_prompt=sys_prompt,
         stream=query_param.stream,
     )
+
+    # 清理响应内容
     if isinstance(response, str) and len(response) > len(sys_prompt):
         response = (
             response.replace(sys_prompt, "")
@@ -1777,18 +1800,19 @@ async def kg_query_with_keywords(
             .strip()
         )
 
-    # Save to cache
-    await save_to_cache(
-        hashing_kv,
-        CacheData(
-            args_hash=args_hash,
-            content=response,
-            prompt=query,
-            quantized=quantized,
-            min_val=min_val,
-            max_val=max_val,
-            mode=query_param.mode,
-            cache_type="query",
-        ),
-    )
+        # 7. Save cache - 只有在收集完整响应后才缓存
+        await save_to_cache(
+            hashing_kv,
+            CacheData(
+                args_hash=args_hash,
+                content=response,
+                prompt=query,
+                quantized=quantized,
+                min_val=min_val,
+                max_val=max_val,
+                mode=query_param.mode,
+                cache_type="query",
+            ),
+        )
+
     return response

From a923ee89b0e54bce341a1b6eb51b2bde58254ef8 Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Sat, 1 Mar 2025 17:47:53 +0800
Subject: [PATCH 2/4] Update __init__.py

---
 lightrag/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index 250c2ae8..2d660928 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
 
-__version__ = "1.2.2"
+__version__ = "1.2.3"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"

From a8f4385c0517a724eed5ee6ce1ba510affcf1c3a Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Sat, 1 Mar 2025 18:30:58 +0800
Subject: [PATCH 3/4] Add clear_cache

---
 README.md            | 34 ++++++++++++++++++++++++++++++
 lightrag/lightrag.py | 49 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/README.md b/README.md
index 84b35654..86211b35 100644
--- a/README.md
+++ b/README.md
@@ -751,6 +751,40 @@ rag.delete_by_entity("Project Gutenberg")
 rag.delete_by_doc_id("doc_id")
 ```
 
+## Cache
+
+<details>
+  <summary> <b>Clear Cache</b> </summary>
+
+You can clear the LLM response cache with different modes:
+
+```python
+# Clear all cache
+await rag.aclear_cache()
+
+# Clear local mode cache
+await rag.aclear_cache(modes=["local"])
+
+# Clear extraction cache
+await rag.aclear_cache(modes=["default"])
+
+# Clear multiple modes
+await rag.aclear_cache(modes=["local", "global", "hybrid"])
+
+# Synchronous version
+rag.clear_cache(modes=["local"])
+```
+
+Valid modes are:
+- `"default"`: Extraction cache
+- `"naive"`: Naive search cache
+- `"local"`: Local search cache
+- `"global"`: Global search cache
+- `"hybrid"`: Hybrid search cache
+- `"mix"`: Mix search cache
+
+</details>
+
 ## LightRAG init parameters
 
 <details>
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index ea87d188..0f08f20e 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -1588,3 +1588,52 @@ class LightRAG:
                 f"Storage implementation '{storage_name}' requires the following "
                 f"environment variables: {', '.join(missing_vars)}"
             )
+
+    async def aclear_cache(self, modes: list[str] | None = None) -> None:
+        """Clear cache data from the LLM response cache storage.
+
+        Args:
+            modes (list[str] | None): Modes of cache to clear. Options: ["default", "naive", "local", "global", "hybrid", "mix"]. 
+                             "default" represents extraction cache.
+                             If None, clears all cache.
+
+        Example:
+            # Clear all cache
+            await rag.aclear_cache()
+            
+            # Clear local mode cache
+            await rag.aclear_cache(modes=["local"])
+            
+            # Clear extraction cache
+            await rag.aclear_cache(modes=["default"])
+        """
+        if not self.llm_response_cache:
+            logger.warning("No cache storage configured")
+            return
+
+        valid_modes = ["default", "naive", "local", "global", "hybrid", "mix"]
+
+        # Validate input
+        if modes and not all(mode in valid_modes for mode in modes):
+            raise ValueError(f"Invalid mode. Valid modes are: {valid_modes}")
+
+        try:
+            # Reset the cache storage for specified mode
+            if modes:
+                    await self.llm_response_cache.delete(modes)
+                    logger.info(f"Cleared cache for modes: {modes}")
+            else:
+                # Clear all modes
+                await self.llm_response_cache.delete(valid_modes)
+                logger.info("Cleared all cache")
+
+            await self.llm_response_cache.index_done_callback()
+
+        except Exception as e:
+            logger.error(f"Error while clearing cache: {e}")
+
+    def clear_cache(self, modes: list[str] | None = None) -> None:
+        """Synchronous version of aclear_cache."""
+        return always_get_an_event_loop().run_until_complete(
+            self.aclear_cache(modes)
+        )

From 9aa438cf7978a64aa5110501903bb7d95deba102 Mon Sep 17 00:00:00 2001
From: zrguo <zrguo.bupt@qq.com>
Date: Sat, 1 Mar 2025 18:35:12 +0800
Subject: [PATCH 4/4] fix linting

---
 lightrag/lightrag.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 0f08f20e..15338874 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -1593,17 +1593,17 @@ class LightRAG:
         """Clear cache data from the LLM response cache storage.
 
         Args:
-            modes (list[str] | None): Modes of cache to clear. Options: ["default", "naive", "local", "global", "hybrid", "mix"]. 
+            modes (list[str] | None): Modes of cache to clear. Options: ["default", "naive", "local", "global", "hybrid", "mix"].
                              "default" represents extraction cache.
                              If None, clears all cache.
 
         Example:
             # Clear all cache
             await rag.aclear_cache()
-            
+
             # Clear local mode cache
             await rag.aclear_cache(modes=["local"])
-            
+
             # Clear extraction cache
             await rag.aclear_cache(modes=["default"])
         """
@@ -1620,8 +1620,8 @@ class LightRAG:
         try:
             # Reset the cache storage for specified mode
             if modes:
-                    await self.llm_response_cache.delete(modes)
-                    logger.info(f"Cleared cache for modes: {modes}")
+                await self.llm_response_cache.delete(modes)
+                logger.info(f"Cleared cache for modes: {modes}")
             else:
                 # Clear all modes
                 await self.llm_response_cache.delete(valid_modes)
@@ -1634,6 +1634,4 @@ class LightRAG:
 
     def clear_cache(self, modes: list[str] | None = None) -> None:
         """Synchronous version of aclear_cache."""
-        return always_get_an_event_loop().run_until_complete(
-            self.aclear_cache(modes)
-        )
+        return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))