Remove deprecated search_by_prefix

2025-05-03 11:17:49 +08:00
parent 3b4015f6fd
commit 045993f7d2
8 changed files with 0 additions and 261 deletions
--- a/lightrag/kg/chroma_impl.py
+++ b/lightrag/kg/chroma_impl.py
@@ -243,42 +243,6 @@ class ChromaVectorDBStorage(BaseVectorStorage):
            logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
            raise

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        try:
-            # Get all records from the collection
-            # Since ChromaDB doesn't directly support prefix search on IDs,
-            # we'll get all records and filter in Python
-            results = self._collection.get(
-                include=["metadatas", "documents", "embeddings"]
-            )
-
-            matching_records = []
-
-            # Filter records where ID starts with the prefix
-            for i, record_id in enumerate(results["ids"]):
-                if record_id.startswith(prefix):
-                    matching_records.append(
-                        {
-                            "id": record_id,
-                            "content": results["documents"][i],
-                            "vector": results["embeddings"][i],
-                            **results["metadatas"][i],
-                        }
-                    )
-
-            logger.debug(
-                f"Found {len(matching_records)} records with prefix '{prefix}'"
-            )
-            return matching_records
-
        except Exception as e:
            logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
            raise
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@@ -385,27 +385,6 @@ class FaissVectorDBStorage(BaseVectorStorage):

        return True  # Return success

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        matching_records = []
-
-        # Search for records with IDs starting with the prefix
-        for faiss_id, meta in self._id_to_meta.items():
-            if "__id__" in meta and meta["__id__"].startswith(prefix):
-                # Create a copy of all metadata and add "id" field
-                record = {**meta, "id": meta["__id__"]}
-                matching_records.append(record)
-
-        logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
-        return matching_records
-
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        """Get vector data by its ID

--- a/lightrag/kg/milvus_impl.py
+++ b/lightrag/kg/milvus_impl.py
@@ -221,31 +221,6 @@ class MilvusVectorDBStorage(BaseVectorStorage):
        except Exception as e:
            logger.error(f"Error while deleting vectors from {self.namespace}: {e}")

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        try:
-            # Use Milvus query with expression to find IDs with the given prefix
-            expression = f'id like "{prefix}%"'
-            results = self._client.query(
-                collection_name=self.namespace,
-                filter=expression,
-                output_fields=list(self.meta_fields) + ["id"],
-            )
-
-            logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
-            return results
-
-        except Exception as e:
-            logger.error(f"Error searching for records with prefix '{prefix}': {e}")
-            return []
-
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        """Get vector data by its ID

--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -1149,35 +1149,6 @@ class MongoVectorDBStorage(BaseVectorStorage):
        except PyMongoError as e:
            logger.error(f"Error deleting relations for {entity_name}: {str(e)}")

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        try:
-            # Use MongoDB regex to find documents where _id starts with the prefix
-            cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}})
-            matching_records = await cursor.to_list(length=None)
-
-            # Format results, ensuring created_at is included
-            results = [
-                {
-                    **doc,
-                    "id": doc["_id"],
-                    "created_at": doc.get("created_at"),  # Include created_at field
-                }
-                for doc in matching_records
-            ]
-
-            logger.debug(
-                f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}"
-            )
-            return results
-
        except PyMongoError as e:
            logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
            return []
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -259,26 +259,6 @@ class NanoVectorDBStorage(BaseVectorStorage):

        return True  # Return success

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        storage = await self.client_storage
-        matching_records = []
-
-        # Search for records with IDs starting with the prefix
-        for record in storage["data"]:
-            if "__id__" in record and record["__id__"].startswith(prefix):
-                matching_records.append({**record, "id": record["__id__"]})
-
-        logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
-        return matching_records
-
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        """Get vector data by its ID

--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -800,41 +800,6 @@ class PGVectorStorage(BaseVectorStorage):
        except Exception as e:
            logger.error(f"Error deleting relations for entity {entity_name}: {e}")

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        table_name = namespace_to_table_name(self.namespace)
-        if not table_name:
-            logger.error(f"Unknown namespace for prefix search: {self.namespace}")
-            return []
-
-        search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2"
-        params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"}
-
-        try:
-            results = await self.db.query(search_sql, params, multirows=True)
-            logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
-
-            # Format results to match the expected return format
-            formatted_results = []
-            for record in results:
-                formatted_record = dict(record)
-                # Ensure id field is available (for consistency with NanoVectorDB implementation)
-                if "id" not in formatted_record:
-                    formatted_record["id"] = record["id"]
-                formatted_results.append(formatted_record)
-
-            return formatted_results
-        except Exception as e:
-            logger.error(f"Error during prefix search for '{prefix}': {e}")
-            return []
-
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        """Get vector data by its ID

--- a/lightrag/kg/qdrant_impl.py
+++ b/lightrag/kg/qdrant_impl.py
@@ -249,46 +249,6 @@ class QdrantVectorDBStorage(BaseVectorStorage):
        except Exception as e:
            logger.error(f"Error deleting relations for {entity_name}: {e}")

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        try:
-            # Use scroll method to find records with IDs starting with the prefix
-            results = self._client.scroll(
-                collection_name=self.namespace,
-                scroll_filter=models.Filter(
-                    must=[
-                        models.FieldCondition(
-                            key="id", match=models.MatchText(text=prefix, prefix=True)
-                        )
-                    ]
-                ),
-                with_payload=True,
-                with_vectors=False,
-                limit=1000,  # Adjust as needed for your use case
-            )
-
-            # Extract matching points
-            matching_records = results[0]
-
-            # Format the results to match expected return format
-            formatted_results = [{**point.payload} for point in matching_records]
-
-            logger.debug(
-                f"Found {len(formatted_results)} records with prefix '{prefix}'"
-            )
-            return formatted_results
-
-        except Exception as e:
-            logger.error(f"Error searching for prefix '{prefix}': {e}")
-            return []
-
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        """Get vector data by its ID

--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -642,42 +642,6 @@ class TiDBVectorDBStorage(BaseVectorStorage):
        except Exception as e:
            return {"status": "error", "message": str(e)}

-    async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
-        """Search for records with IDs starting with a specific prefix.
-
-        Args:
-            prefix: The prefix to search for in record IDs
-
-        Returns:
-            List of records with matching ID prefixes
-        """
-        # Determine which table to query based on namespace
-        if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
-            sql_template = SQL_TEMPLATES["search_entity_by_prefix"]
-        elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
-            sql_template = SQL_TEMPLATES["search_relationship_by_prefix"]
-        elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
-            sql_template = SQL_TEMPLATES["search_chunk_by_prefix"]
-        else:
-            logger.warning(
-                f"Namespace {self.namespace} not supported for prefix search"
-            )
-            return []
-
-        # Add prefix pattern parameter with % for SQL LIKE
-        prefix_pattern = f"{prefix}%"
-        params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace}
-
-        try:
-            results = await self.db.query(sql_template, params=params, multirows=True)
-            logger.debug(
-                f"Found {len(results) if results else 0} records with prefix '{prefix}'"
-            )
-            return results if results else []
-        except Exception as e:
-            logger.error(f"Error searching records with prefix '{prefix}': {e}")
-            return []
-
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        """Get vector data by its ID

@@ -1333,25 +1297,6 @@ SQL_TEMPLATES = {
        WHERE (source_name = :source AND target_name = :target)
        AND workspace = :workspace
    """,
-    # Search by prefix SQL templates
-    "search_entity_by_prefix": """
-        SELECT entity_id as id, name as entity_name, entity_type, description, content,
-               UNIX_TIMESTAMP(createtime) as created_at
-        FROM LIGHTRAG_GRAPH_NODES
-        WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
-    """,
-    "search_relationship_by_prefix": """
-        SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content,
-               UNIX_TIMESTAMP(createtime) as created_at
-        FROM LIGHTRAG_GRAPH_EDGES
-        WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
-    """,
-    "search_chunk_by_prefix": """
-        SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id,
-               UNIX_TIMESTAMP(createtime) as created_at
-        FROM LIGHTRAG_DOC_CHUNKS
-        WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
-    """,
    # Drop tables
    "drop_specifiy_table_workspace": "DELETE FROM {table_name} WHERE workspace = :workspace",
 }