From 045993f7d29138fd9c11a06bf80933867a646609 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 3 May 2025 11:17:49 +0800 Subject: [PATCH] Remove deprecated search_by_prefix --- lightrag/kg/chroma_impl.py | 36 ------------------- lightrag/kg/faiss_impl.py | 21 ------------ lightrag/kg/milvus_impl.py | 25 -------------- lightrag/kg/mongo_impl.py | 29 ---------------- lightrag/kg/nano_vector_db_impl.py | 20 ----------- lightrag/kg/postgres_impl.py | 35 ------------------- lightrag/kg/qdrant_impl.py | 40 ---------------------- lightrag/kg/tidb_impl.py | 55 ------------------------------ 8 files changed, 261 deletions(-) diff --git a/lightrag/kg/chroma_impl.py b/lightrag/kg/chroma_impl.py index 125f51d4..c3927a19 100644 --- a/lightrag/kg/chroma_impl.py +++ b/lightrag/kg/chroma_impl.py @@ -243,42 +243,6 @@ class ChromaVectorDBStorage(BaseVectorStorage): logger.error(f"Error while deleting vectors from {self.namespace}: {e}") raise - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - try: - # Get all records from the collection - # Since ChromaDB doesn't directly support prefix search on IDs, - # we'll get all records and filter in Python - results = self._collection.get( - include=["metadatas", "documents", "embeddings"] - ) - - matching_records = [] - - # Filter records where ID starts with the prefix - for i, record_id in enumerate(results["ids"]): - if record_id.startswith(prefix): - matching_records.append( - { - "id": record_id, - "content": results["documents"][i], - "vector": results["embeddings"][i], - **results["metadatas"][i], - } - ) - - logger.debug( - f"Found {len(matching_records)} records with prefix '{prefix}'" - ) - return matching_records - except Exception as e: logger.error(f"Error during prefix search in ChromaDB: {str(e)}") raise diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py index 6cf07f41..f2afde2e 100644 --- a/lightrag/kg/faiss_impl.py +++ b/lightrag/kg/faiss_impl.py @@ -385,27 +385,6 @@ class FaissVectorDBStorage(BaseVectorStorage): return True # Return success - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - matching_records = [] - - # Search for records with IDs starting with the prefix - for faiss_id, meta in self._id_to_meta.items(): - if "__id__" in meta and meta["__id__"].startswith(prefix): - # Create a copy of all metadata and add "id" field - record = {**meta, "id": meta["__id__"]} - matching_records.append(record) - - logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'") - return matching_records - async def get_by_id(self, id: str) -> dict[str, Any] | None: """Get vector data by its ID diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py index 3b98ed37..00de16c5 100644 --- a/lightrag/kg/milvus_impl.py +++ b/lightrag/kg/milvus_impl.py @@ -221,31 +221,6 @@ class MilvusVectorDBStorage(BaseVectorStorage): except Exception as e: logger.error(f"Error while deleting vectors from {self.namespace}: {e}") - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - try: - # Use Milvus query with expression to find IDs with the given prefix - expression = f'id like "{prefix}%"' - results = self._client.query( - collection_name=self.namespace, - filter=expression, - output_fields=list(self.meta_fields) + ["id"], - ) - - logger.debug(f"Found {len(results)} records with prefix '{prefix}'") - return results - - except Exception as e: - logger.error(f"Error searching for records with prefix '{prefix}': {e}") - return [] - async def get_by_id(self, id: str) -> dict[str, Any] | None: """Get vector data by its ID diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index 7e5cef9a..d49a36b7 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -1149,35 +1149,6 @@ class MongoVectorDBStorage(BaseVectorStorage): except PyMongoError as e: logger.error(f"Error deleting relations for {entity_name}: {str(e)}") - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - try: - # Use MongoDB regex to find documents where _id starts with the prefix - cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}}) - matching_records = await cursor.to_list(length=None) - - # Format results, ensuring created_at is included - results = [ - { - **doc, - "id": doc["_id"], - "created_at": doc.get("created_at"), # Include created_at field - } - for doc in matching_records - ] - - logger.debug( - f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}" - ) - return results - except PyMongoError as e: logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}") return [] diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py index 7db01a55..fa56a214 100644 --- a/lightrag/kg/nano_vector_db_impl.py +++ b/lightrag/kg/nano_vector_db_impl.py @@ -259,26 +259,6 @@ class NanoVectorDBStorage(BaseVectorStorage): return True # Return success - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - storage = await self.client_storage - matching_records = [] - - # Search for records with IDs starting with the prefix - for record in storage["data"]: - if "__id__" in record and record["__id__"].startswith(prefix): - matching_records.append({**record, "id": record["__id__"]}) - - logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'") - return matching_records - async def get_by_id(self, id: str) -> dict[str, Any] | None: """Get vector data by its ID diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 85506f47..026d3f6e 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -800,41 +800,6 @@ class PGVectorStorage(BaseVectorStorage): except Exception as e: logger.error(f"Error deleting relations for entity {entity_name}: {e}") - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - table_name = namespace_to_table_name(self.namespace) - if not table_name: - logger.error(f"Unknown namespace for prefix search: {self.namespace}") - return [] - - search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2" - params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"} - - try: - results = await self.db.query(search_sql, params, multirows=True) - logger.debug(f"Found {len(results)} records with prefix '{prefix}'") - - # Format results to match the expected return format - formatted_results = [] - for record in results: - formatted_record = dict(record) - # Ensure id field is available (for consistency with NanoVectorDB implementation) - if "id" not in formatted_record: - formatted_record["id"] = record["id"] - formatted_results.append(formatted_record) - - return formatted_results - except Exception as e: - logger.error(f"Error during prefix search for '{prefix}': {e}") - return [] - async def get_by_id(self, id: str) -> dict[str, Any] | None: """Get vector data by its ID diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index 35c3d308..885a23ca 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -249,46 +249,6 @@ class QdrantVectorDBStorage(BaseVectorStorage): except Exception as e: logger.error(f"Error deleting relations for {entity_name}: {e}") - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - try: - # Use scroll method to find records with IDs starting with the prefix - results = self._client.scroll( - collection_name=self.namespace, - scroll_filter=models.Filter( - must=[ - models.FieldCondition( - key="id", match=models.MatchText(text=prefix, prefix=True) - ) - ] - ), - with_payload=True, - with_vectors=False, - limit=1000, # Adjust as needed for your use case - ) - - # Extract matching points - matching_records = results[0] - - # Format the results to match expected return format - formatted_results = [{**point.payload} for point in matching_records] - - logger.debug( - f"Found {len(formatted_results)} records with prefix '{prefix}'" - ) - return formatted_results - - except Exception as e: - logger.error(f"Error searching for prefix '{prefix}': {e}") - return [] - async def get_by_id(self, id: str) -> dict[str, Any] | None: """Get vector data by its ID diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py index 7dd2bf39..71e90b22 100644 --- a/lightrag/kg/tidb_impl.py +++ b/lightrag/kg/tidb_impl.py @@ -642,42 +642,6 @@ class TiDBVectorDBStorage(BaseVectorStorage): except Exception as e: return {"status": "error", "message": str(e)} - async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]: - """Search for records with IDs starting with a specific prefix. - - Args: - prefix: The prefix to search for in record IDs - - Returns: - List of records with matching ID prefixes - """ - # Determine which table to query based on namespace - if self.namespace == NameSpace.VECTOR_STORE_ENTITIES: - sql_template = SQL_TEMPLATES["search_entity_by_prefix"] - elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS: - sql_template = SQL_TEMPLATES["search_relationship_by_prefix"] - elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS: - sql_template = SQL_TEMPLATES["search_chunk_by_prefix"] - else: - logger.warning( - f"Namespace {self.namespace} not supported for prefix search" - ) - return [] - - # Add prefix pattern parameter with % for SQL LIKE - prefix_pattern = f"{prefix}%" - params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace} - - try: - results = await self.db.query(sql_template, params=params, multirows=True) - logger.debug( - f"Found {len(results) if results else 0} records with prefix '{prefix}'" - ) - return results if results else [] - except Exception as e: - logger.error(f"Error searching records with prefix '{prefix}': {e}") - return [] - async def get_by_id(self, id: str) -> dict[str, Any] | None: """Get vector data by its ID @@ -1333,25 +1297,6 @@ SQL_TEMPLATES = { WHERE (source_name = :source AND target_name = :target) AND workspace = :workspace """, - # Search by prefix SQL templates - "search_entity_by_prefix": """ - SELECT entity_id as id, name as entity_name, entity_type, description, content, - UNIX_TIMESTAMP(createtime) as created_at - FROM LIGHTRAG_GRAPH_NODES - WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace - """, - "search_relationship_by_prefix": """ - SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content, - UNIX_TIMESTAMP(createtime) as created_at - FROM LIGHTRAG_GRAPH_EDGES - WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace - """, - "search_chunk_by_prefix": """ - SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id, - UNIX_TIMESTAMP(createtime) as created_at - FROM LIGHTRAG_DOC_CHUNKS - WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace - """, # Drop tables "drop_specifiy_table_workspace": "DELETE FROM {table_name} WHERE workspace = :workspace", }