diff --git a/examples/test_faiss.py b/examples/test_faiss.py index e73c0bfc..ab0ef9f7 100644 --- a/examples/test_faiss.py +++ b/examples/test_faiss.py @@ -8,7 +8,6 @@ from sentence_transformers import SentenceTransformer from openai import AzureOpenAI from lightrag import LightRAG, QueryParam from lightrag.utils import EmbeddingFunc -from lightrag.kg.faiss_impl import FaissVectorDBStorage # Configure Logging logging.basicConfig(level=logging.INFO) @@ -20,14 +19,10 @@ AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT") AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") + async def llm_model_func( - prompt, - system_prompt=None, - history_messages=[], - keyword_extraction=False, - **kwargs + prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: - # Create a client for AzureOpenAI client = AzureOpenAI( api_key=AZURE_OPENAI_API_KEY, @@ -56,12 +51,12 @@ async def llm_model_func( async def embedding_func(texts: list[str]) -> np.ndarray: - model = SentenceTransformer('all-MiniLM-L6-v2') + model = SentenceTransformer("all-MiniLM-L6-v2") embeddings = model.encode(texts, convert_to_numpy=True) return embeddings + def main(): - WORKING_DIR = "./dickens" # Initialize LightRAG with the LLM model function and embedding function @@ -76,7 +71,7 @@ def main(): vector_storage="FaissVectorDBStorage", vector_db_storage_cls_kwargs={ "cosine_better_than_threshold": 0.3 # Your desired threshold - } + }, ) # Insert the custom chunks into LightRAG @@ -101,4 +96,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py index 1688e507..fc6aa779 100644 --- a/lightrag/kg/faiss_impl.py +++ b/lightrag/kg/faiss_impl.py @@ -22,6 +22,7 @@ class FaissVectorDBStorage(BaseVectorStorage): A Faiss-based Vector DB Storage for LightRAG. Uses cosine similarity by storing normalized vectors in a Faiss index with inner product search. """ + cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2")) def __post_init__(self): @@ -46,7 +47,7 @@ class FaissVectorDBStorage(BaseVectorStorage): # For demonstration, we use a simple IndexFlatIP. self._index = faiss.IndexFlatIP(self._dim) - # Keep a local store for metadata, IDs, etc. + # Keep a local store for metadata, IDs, etc. # Maps → metadata (including your original ID). self._id_to_meta = {} @@ -93,7 +94,9 @@ class FaissVectorDBStorage(BaseVectorStorage): for i in range(0, len(contents), self._max_batch_size) ] - pbar = tqdm_async(total=len(batches), desc="Generating embeddings", unit="batch") + pbar = tqdm_async( + total=len(batches), desc="Generating embeddings", unit="batch" + ) async def wrapped_task(batch): result = await self.embedding_func(batch) @@ -200,7 +203,9 @@ class FaissVectorDBStorage(BaseVectorStorage): if to_remove: self._remove_faiss_ids(to_remove) - logger.info(f"Successfully deleted {len(to_remove)} vectors from {self.namespace}") + logger.info( + f"Successfully deleted {len(to_remove)} vectors from {self.namespace}" + ) async def delete_entity(self, entity_name: str): """ @@ -288,7 +293,7 @@ class FaissVectorDBStorage(BaseVectorStorage): def _load_faiss_index(self): """ - Load the Faiss index + metadata from disk if it exists, + Load the Faiss index + metadata from disk if it exists, and rebuild in-memory structures so we can query. """ if not os.path.exists(self._faiss_index_file):