Add some script in examples to copy llm cache from one solution to another

2025-01-16 07:56:13 +08:00
parent 2b7d253117
commit d1ba8c5db5
1 changed files with 35 additions and 4 deletions
--- a/examples/copy_llm_cache_to_another_storage.py
+++ b/examples/copy_llm_cache_to_another_storage.py
@@ -1,3 +1,9 @@
+"""
+Sometimes you need to switch a storage solution, but you want to save LLM token and time.
+This handy script helps you to copy the LLM caches from one storage solution to another.
+(Not all the storage impl are supported)
+"""
+
 import asyncio
 import logging
 import os
@@ -8,7 +14,7 @@ from lightrag.storage import JsonKVStorage

 load_dotenv()
 ROOT_DIR = os.environ.get("ROOT_DIR")
-WORKING_DIR = f"{ROOT_DIR}/dickens-pg"
+WORKING_DIR = f"{ROOT_DIR}/dickens"

 logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

@@ -24,12 +30,12 @@ postgres_db = PostgreSQLDB(
        "port": 15432,
        "user": "rag",
        "password": "rag",
-        "database": "r1",
+        "database": "r2",
    }
 )


-async def main():
+async def copy_from_postgres_to_json():
    await postgres_db.initdb()

    from_llm_response_cache = PGKVStorage(
@@ -62,5 +68,30 @@ async def main():
    print("Mission accomplished!")


+async def copy_from_json_to_postgres():
+    await postgres_db.initdb()
+
+    from_llm_response_cache = JsonKVStorage(
+        namespace="llm_response_cache",
+        global_config={"working_dir": WORKING_DIR},
+        embedding_func=None,
+    )
+
+    to_llm_response_cache = PGKVStorage(
+        namespace="llm_response_cache",
+        global_config={"embedding_batch_num": 6},
+        embedding_func=None,
+        db=postgres_db,
+    )
+
+    for mode in await from_llm_response_cache.all_keys():
+        print(f"Copying {mode}")
+        caches = await from_llm_response_cache.get_by_id(mode)
+        for k, v in caches.items():
+            item = {mode: {k: v}}
+            print(f"\tCopying {item}")
+            await to_llm_response_cache.upsert(item)
+
+
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(copy_from_json_to_postgres())