Merge branch 'HKUDS:main' into main

This commit is contained in:
Saifeddine ALOUI
2025-02-18 10:25:31 +01:00
committed by GitHub
5 changed files with 24 additions and 60 deletions

View File

@@ -135,10 +135,6 @@ class BaseKVStorage(StorageNameSpace, ABC):
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
"""Upsert data""" """Upsert data"""
@abstractmethod
async def drop(self) -> None:
"""Drop the storage"""
@dataclass @dataclass
class BaseGraphStorage(StorageNameSpace, ABC): class BaseGraphStorage(StorageNameSpace, ABC):

View File

@@ -50,6 +50,3 @@ class JsonKVStorage(BaseKVStorage):
for doc_id in ids: for doc_id in ids:
self._data.pop(doc_id, None) self._data.pop(doc_id, None)
await self.index_done_callback() await self.index_done_callback()
async def drop(self) -> None:
self._data = {}

View File

@@ -117,10 +117,6 @@ class MongoKVStorage(BaseKVStorage):
# Mongo handles persistence automatically # Mongo handles persistence automatically
pass pass
async def drop(self) -> None:
"""Drop the collection"""
await self._data.drop()
@final @final
@dataclass @dataclass
@@ -202,10 +198,6 @@ class MongoDocStatusStorage(DocStatusStorage):
# Mongo handles persistence automatically # Mongo handles persistence automatically
pass pass
async def drop(self) -> None:
"""Drop the collection"""
await self._data.drop()
@final @final
@dataclass @dataclass

View File

@@ -61,8 +61,3 @@ class RedisKVStorage(BaseKVStorage):
async def index_done_callback(self) -> None: async def index_done_callback(self) -> None:
# Redis handles persistence automatically # Redis handles persistence automatically
pass pass
async def drop(self) -> None:
keys = await self._redis.keys(f"{self.namespace}:*")
if keys:
await self._redis.delete(*keys)

View File

@@ -640,15 +640,13 @@ async def kg_query(
) )
query_param.mode = "local" query_param.mode = "local"
ll_keywords = ", ".join(ll_keywords) if ll_keywords else "" ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
hl_keywords = ", ".join(hl_keywords) if hl_keywords else "" hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
logger.info("Using %s mode for query processing", query_param.mode)
# Build context # Build context
keywords = [ll_keywords, hl_keywords]
context = await _build_query_context( context = await _build_query_context(
keywords, ll_keywords_str,
hl_keywords_str,
knowledge_graph_inst, knowledge_graph_inst,
entities_vdb, entities_vdb,
relationships_vdb, relationships_vdb,
@@ -873,7 +871,8 @@ async def mix_kg_vector_query(
# Build knowledge graph context # Build knowledge graph context
context = await _build_query_context( context = await _build_query_context(
[ll_keywords_str, hl_keywords_str], ll_keywords_str,
hl_keywords_str,
knowledge_graph_inst, knowledge_graph_inst,
entities_vdb, entities_vdb,
relationships_vdb, relationships_vdb,
@@ -1013,18 +1012,14 @@ async def mix_kg_vector_query(
async def _build_query_context( async def _build_query_context(
query: list, ll_keywords: str,
hl_keywords: str,
knowledge_graph_inst: BaseGraphStorage, knowledge_graph_inst: BaseGraphStorage,
entities_vdb: BaseVectorStorage, entities_vdb: BaseVectorStorage,
relationships_vdb: BaseVectorStorage, relationships_vdb: BaseVectorStorage,
text_chunks_db: BaseKVStorage, text_chunks_db: BaseKVStorage,
query_param: QueryParam, query_param: QueryParam,
): ):
# ll_entities_context, ll_relations_context, ll_text_units_context = "", "", ""
# hl_entities_context, hl_relations_context, hl_text_units_context = "", "", ""
ll_keywords, hl_keywords = query[0], query[1]
if query_param.mode == "local": if query_param.mode == "local":
entities_context, relations_context, text_units_context = await _get_node_data( entities_context, relations_context, text_units_context = await _get_node_data(
ll_keywords, ll_keywords,
@@ -1081,32 +1076,24 @@ async def _build_query_context(
return None return None
result = f""" result = f"""
-----Entities----- -----Entities-----
```csv ```csv
{entities_context} {entities_context}
``` ```
-----Relationships----- -----Relationships-----
```csv ```csv
{relations_context} {relations_context}
``` ```
-----Sources----- -----Sources-----
```csv ```csv
{text_units_context} {text_units_context}
``` ```
""" """.strip()
contex_tokens = len(encode_string_by_tiktoken(result))
entities_tokens = len(encode_string_by_tiktoken(entities_context))
relations_tokens = len(encode_string_by_tiktoken(relations_context))
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
logger.debug(
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
)
return result return result
async def _get_node_data( async def _get_node_data(
query, query: str,
knowledge_graph_inst: BaseGraphStorage, knowledge_graph_inst: BaseGraphStorage,
entities_vdb: BaseVectorStorage, entities_vdb: BaseVectorStorage,
text_chunks_db: BaseKVStorage, text_chunks_db: BaseKVStorage,
@@ -1760,15 +1747,12 @@ async def kg_query_with_keywords(
ll_keywords_str = ", ".join(ll_keywords_flat) if ll_keywords_flat else "" ll_keywords_str = ", ".join(ll_keywords_flat) if ll_keywords_flat else ""
hl_keywords_str = ", ".join(hl_keywords_flat) if hl_keywords_flat else "" hl_keywords_str = ", ".join(hl_keywords_flat) if hl_keywords_flat else ""
keywords = [ll_keywords_str, hl_keywords_str]
logger.info("Using %s mode for query processing", query_param.mode)
# --------------------------- # ---------------------------
# 3) BUILD CONTEXT # 3) BUILD CONTEXT
# --------------------------- # ---------------------------
context = await _build_query_context( context = await _build_query_context(
keywords, ll_keywords_str,
hl_keywords_str,
knowledge_graph_inst, knowledge_graph_inst,
entities_vdb, entities_vdb,
relationships_vdb, relationships_vdb,