feat(lightrag): 添加 查询时使用embedding缓存功能

- 在 LightRAG 类中添加 embedding_cache_config配置项
- 实现基于 embedding 相似度的缓存查询和存储
- 添加量化和反量化函数,用于压缩 embedding 数据
- 新增示例演示 embedding 缓存的使用
This commit is contained in:
magicyuan876
2024-12-06 08:17:20 +08:00
parent 645890aff6
commit d48c6e4588
5 changed files with 431 additions and 34 deletions

View File

@@ -307,3 +307,72 @@ def process_combine_contexts(hl, ll):
combined_sources_result = "\n".join(combined_sources_result)
return combined_sources_result
async def get_best_cached_response(
hashing_kv, current_embedding, similarity_threshold=0.95
):
"""Get the cached response with highest similarity"""
try:
# Get all keys using list_keys()
all_keys = await hashing_kv.all_keys()
max_similarity = 0
best_cached_response = None
# Get cached data one by one
for key in all_keys:
cache_data = await hashing_kv.get_by_id(key)
if cache_data is None or "embedding" not in cache_data:
continue
# Convert cached embedding list to ndarray
cached_quantized = np.frombuffer(
bytes.fromhex(cache_data["embedding"]), dtype=np.uint8
).reshape(cache_data["embedding_shape"])
cached_embedding = dequantize_embedding(
cached_quantized,
cache_data["embedding_min"],
cache_data["embedding_max"],
)
similarity = cosine_similarity(current_embedding, cached_embedding)
if similarity > max_similarity:
max_similarity = similarity
best_cached_response = cache_data["return"]
if max_similarity > similarity_threshold:
return best_cached_response
return None
except Exception as e:
logger.warning(f"Error in get_best_cached_response: {e}")
return None
def cosine_similarity(v1, v2):
"""Calculate cosine similarity between two vectors"""
dot_product = np.dot(v1, v2)
norm1 = np.linalg.norm(v1)
norm2 = np.linalg.norm(v2)
return dot_product / (norm1 * norm2)
def quantize_embedding(embedding: np.ndarray, bits=8) -> tuple:
"""Quantize embedding to specified bits"""
# Calculate min/max values for reconstruction
min_val = embedding.min()
max_val = embedding.max()
# Quantize to 0-255 range
scale = (2**bits - 1) / (max_val - min_val)
quantized = np.round((embedding - min_val) * scale).astype(np.uint8)
return quantized, min_val, max_val
def dequantize_embedding(
quantized: np.ndarray, min_val: float, max_val: float, bits=8
) -> np.ndarray:
"""Restore quantized embedding"""
scale = (max_val - min_val) / (2**bits - 1)
return (quantized * scale + min_val).astype(np.float32)