Implement the missing methods.

This commit is contained in:
zrguo
2025-03-04 15:50:53 +08:00
parent de9aeedad7
commit 3a2a636862
11 changed files with 1603 additions and 43 deletions

View File

@@ -15,7 +15,7 @@ from ..base import (
DocStatusStorage,
)
from ..namespace import NameSpace, is_namespace
from ..utils import logger
from ..utils import logger, compute_mdhash_id
from ..types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
import pipmaster as pm
@@ -333,7 +333,7 @@ class MongoGraphStorage(BaseGraphStorage):
Check if there's a direct single-hop edge from source_node_id to target_node_id.
We'll do a $graphLookup with maxDepth=0 from the source node—meaning
Look up zero expansions. Actually, for a direct edge check, we can do maxDepth=1
"Look up zero expansions." Actually, for a direct edge check, we can do maxDepth=1
and then see if the target node is in the "reachableNodes" at depth=0.
But typically for a direct edge, we might just do a find_one.
@@ -795,6 +795,52 @@ class MongoGraphStorage(BaseGraphStorage):
# Mongo handles persistence automatically
pass
async def remove_nodes(self, nodes: list[str]) -> None:
"""Delete multiple nodes
Args:
nodes: List of node IDs to be deleted
"""
logger.info(f"Deleting {len(nodes)} nodes")
if not nodes:
return
# 1. Remove all edges referencing these nodes (remove from edges array of other nodes)
await self.collection.update_many(
{},
{"$pull": {"edges": {"target": {"$in": nodes}}}}
)
# 2. Delete the node documents
await self.collection.delete_many({"_id": {"$in": nodes}})
logger.debug(f"Successfully deleted nodes: {nodes}")
async def remove_edges(self, edges: list[tuple[str, str]]) -> None:
"""Delete multiple edges
Args:
edges: List of edges to be deleted, each edge is a (source, target) tuple
"""
logger.info(f"Deleting {len(edges)} edges")
if not edges:
return
update_tasks = []
for source, target in edges:
# Remove edge pointing to target from source node's edges array
update_tasks.append(
self.collection.update_one(
{"_id": source},
{"$pull": {"edges": {"target": target}}}
)
)
if update_tasks:
await asyncio.gather(*update_tasks)
logger.debug(f"Successfully deleted edges: {edges}")
@final
@dataclass
@@ -932,11 +978,66 @@ class MongoVectorDBStorage(BaseVectorStorage):
# Mongo handles persistence automatically
pass
async def delete(self, ids: list[str]) -> None:
"""Delete vectors with specified IDs
Args:
ids: List of vector IDs to be deleted
"""
logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
if not ids:
return
try:
result = await self._data.delete_many({"_id": {"$in": ids}})
logger.debug(f"Successfully deleted {result.deleted_count} vectors from {self.namespace}")
except PyMongoError as e:
logger.error(f"Error while deleting vectors from {self.namespace}: {str(e)}")
async def delete_entity(self, entity_name: str) -> None:
raise NotImplementedError
"""Delete an entity by its name
Args:
entity_name: Name of the entity to delete
"""
try:
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
result = await self._data.delete_one({"_id": entity_id})
if result.deleted_count > 0:
logger.debug(f"Successfully deleted entity {entity_name}")
else:
logger.debug(f"Entity {entity_name} not found in storage")
except PyMongoError as e:
logger.error(f"Error deleting entity {entity_name}: {str(e)}")
async def delete_entity_relation(self, entity_name: str) -> None:
raise NotImplementedError
"""Delete all relations associated with an entity
Args:
entity_name: Name of the entity whose relations should be deleted
"""
try:
# Find relations where entity appears as source or target
relations_cursor = self._data.find(
{"$or": [{"src_id": entity_name}, {"tgt_id": entity_name}]}
)
relations = await relations_cursor.to_list(length=None)
if not relations:
logger.debug(f"No relations found for entity {entity_name}")
return
# Extract IDs of relations to delete
relation_ids = [relation["_id"] for relation in relations]
logger.debug(f"Found {len(relation_ids)} relations for entity {entity_name}")
# Delete the relations
result = await self._data.delete_many({"_id": {"$in": relation_ids}})
logger.debug(f"Deleted {result.deleted_count} relations for {entity_name}")
except PyMongoError as e:
logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):