feat(lightrag): 添加查询时使用embedding缓存功能

- 在 LightRAG 类中添加 embedding_cache_config配置项 - 实现基于 embedding 相似度的缓存查询和存储 - 添加量化和反量化函数，用于压缩 embedding 数据 - 新增示例演示 embedding 缓存的使用
2024-12-06 08:17:20 +08:00
parent 645890aff6
commit d48c6e4588
5 changed files with 431 additions and 34 deletions
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -307,3 +307,72 @@ def process_combine_contexts(hl, ll):
    combined_sources_result = "\n".join(combined_sources_result)

    return combined_sources_result
+
+
+async def get_best_cached_response(
+    hashing_kv, current_embedding, similarity_threshold=0.95
+):
+    """Get the cached response with highest similarity"""
+    try:
+        # Get all keys using list_keys()
+        all_keys = await hashing_kv.all_keys()
+        max_similarity = 0
+        best_cached_response = None
+
+        # Get cached data one by one
+        for key in all_keys:
+            cache_data = await hashing_kv.get_by_id(key)
+            if cache_data is None or "embedding" not in cache_data:
+                continue
+
+            # Convert cached embedding list to ndarray
+            cached_quantized = np.frombuffer(
+                bytes.fromhex(cache_data["embedding"]), dtype=np.uint8
+            ).reshape(cache_data["embedding_shape"])
+            cached_embedding = dequantize_embedding(
+                cached_quantized,
+                cache_data["embedding_min"],
+                cache_data["embedding_max"],
+            )
+
+            similarity = cosine_similarity(current_embedding, cached_embedding)
+            if similarity > max_similarity:
+                max_similarity = similarity
+                best_cached_response = cache_data["return"]
+
+        if max_similarity > similarity_threshold:
+            return best_cached_response
+        return None
+
+    except Exception as e:
+        logger.warning(f"Error in get_best_cached_response: {e}")
+        return None
+
+
+def cosine_similarity(v1, v2):
+    """Calculate cosine similarity between two vectors"""
+    dot_product = np.dot(v1, v2)
+    norm1 = np.linalg.norm(v1)
+    norm2 = np.linalg.norm(v2)
+    return dot_product / (norm1 * norm2)
+
+
+def quantize_embedding(embedding: np.ndarray, bits=8) -> tuple:
+    """Quantize embedding to specified bits"""
+    # Calculate min/max values for reconstruction
+    min_val = embedding.min()
+    max_val = embedding.max()
+
+    # Quantize to 0-255 range
+    scale = (2**bits - 1) / (max_val - min_val)
+    quantized = np.round((embedding - min_val) * scale).astype(np.uint8)
+
+    return quantized, min_val, max_val
+
+
+def dequantize_embedding(
+    quantized: np.ndarray, min_val: float, max_val: float, bits=8
+) -> np.ndarray:
+    """Restore quantized embedding"""
+    scale = (max_val - min_val) / (2**bits - 1)
+    return (quantized * scale + min_val).astype(np.float32)