feat(lightrag): 添加 查询时使用embedding缓存功能
- 在 LightRAG 类中添加 embedding_cache_config配置项 - 实现基于 embedding 相似度的缓存查询和存储 - 添加量化和反量化函数,用于压缩 embedding 数据 - 新增示例演示 embedding 缓存的使用
This commit is contained in:
@@ -307,3 +307,72 @@ def process_combine_contexts(hl, ll):
|
||||
combined_sources_result = "\n".join(combined_sources_result)
|
||||
|
||||
return combined_sources_result
|
||||
|
||||
|
||||
async def get_best_cached_response(
|
||||
hashing_kv, current_embedding, similarity_threshold=0.95
|
||||
):
|
||||
"""Get the cached response with highest similarity"""
|
||||
try:
|
||||
# Get all keys using list_keys()
|
||||
all_keys = await hashing_kv.all_keys()
|
||||
max_similarity = 0
|
||||
best_cached_response = None
|
||||
|
||||
# Get cached data one by one
|
||||
for key in all_keys:
|
||||
cache_data = await hashing_kv.get_by_id(key)
|
||||
if cache_data is None or "embedding" not in cache_data:
|
||||
continue
|
||||
|
||||
# Convert cached embedding list to ndarray
|
||||
cached_quantized = np.frombuffer(
|
||||
bytes.fromhex(cache_data["embedding"]), dtype=np.uint8
|
||||
).reshape(cache_data["embedding_shape"])
|
||||
cached_embedding = dequantize_embedding(
|
||||
cached_quantized,
|
||||
cache_data["embedding_min"],
|
||||
cache_data["embedding_max"],
|
||||
)
|
||||
|
||||
similarity = cosine_similarity(current_embedding, cached_embedding)
|
||||
if similarity > max_similarity:
|
||||
max_similarity = similarity
|
||||
best_cached_response = cache_data["return"]
|
||||
|
||||
if max_similarity > similarity_threshold:
|
||||
return best_cached_response
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in get_best_cached_response: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def cosine_similarity(v1, v2):
|
||||
"""Calculate cosine similarity between two vectors"""
|
||||
dot_product = np.dot(v1, v2)
|
||||
norm1 = np.linalg.norm(v1)
|
||||
norm2 = np.linalg.norm(v2)
|
||||
return dot_product / (norm1 * norm2)
|
||||
|
||||
|
||||
def quantize_embedding(embedding: np.ndarray, bits=8) -> tuple:
|
||||
"""Quantize embedding to specified bits"""
|
||||
# Calculate min/max values for reconstruction
|
||||
min_val = embedding.min()
|
||||
max_val = embedding.max()
|
||||
|
||||
# Quantize to 0-255 range
|
||||
scale = (2**bits - 1) / (max_val - min_val)
|
||||
quantized = np.round((embedding - min_val) * scale).astype(np.uint8)
|
||||
|
||||
return quantized, min_val, max_val
|
||||
|
||||
|
||||
def dequantize_embedding(
|
||||
quantized: np.ndarray, min_val: float, max_val: float, bits=8
|
||||
) -> np.ndarray:
|
||||
"""Restore quantized embedding"""
|
||||
scale = (max_val - min_val) / (2**bits - 1)
|
||||
return (quantized * scale + min_val).astype(np.float32)
|
||||
|
Reference in New Issue
Block a user