Fix 'TOO MANY OPEN FILE' problem while using redis vector DB:

Enhance RedisKVStorage: Implement connection pooling and error handling. Refactor async methods to use context managers for Redis operations, improving resource management and error logging. Batch processing added for key operations to optimize performance.
2025-04-02 21:06:49 -07:00
parent 7a67f6c2fd
commit d0d246bef8
2 changed files with 156 additions and 88 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import asyncio
+import traceback
 import json
 import re
 import os
@@ -994,6 +995,7 @@ async def mix_kg_vector_query(

        except Exception as e:
            logger.error(f"Error in get_kg_context: {str(e)}")
+            traceback.print_exc()
            return None

    async def get_vector_context():
@@ -1382,9 +1384,16 @@ async def _find_most_related_text_unit_from_entities(
                all_text_units_lookup[c_id] = index
                tasks.append((c_id, index, this_edges))

-    results = await asyncio.gather(
-        *[text_chunks_db.get_by_id(c_id) for c_id, _, _ in tasks]
-    )
+    # Process in batches of 25 tasks at a time to avoid overwhelming resources
+    batch_size = 25
+    results = []
+    
+    for i in range(0, len(tasks), batch_size):
+        batch_tasks = tasks[i:i + batch_size]
+        batch_results = await asyncio.gather(
+            *[text_chunks_db.get_by_id(c_id) for c_id, _, _ in batch_tasks]
+        )
+        results.extend(batch_results)

    for (c_id, index, this_edges), data in zip(tasks, results):
        all_text_units_lookup[c_id] = {