Refactor requirements and code formatting
- Simplified requirements.txt by removing specific version constraints - Added comment about extra library installation using pipmaster - Improved code formatting in base.py, operate.py, and postgres_impl.py - Cleaned up SQL templates and query method signatures with consistent formatting
This commit is contained in:
@@ -108,8 +108,11 @@ class BaseVectorStorage(StorageNameSpace, ABC):
|
|||||||
embedding_func: EmbeddingFunc
|
embedding_func: EmbeddingFunc
|
||||||
cosine_better_than_threshold: float = field(default=0.2)
|
cosine_better_than_threshold: float = field(default=0.2)
|
||||||
meta_fields: set[str] = field(default_factory=set)
|
meta_fields: set[str] = field(default_factory=set)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def query(self, query: str, top_k: int, ids: list[str] | None = None) -> list[dict[str, Any]]:
|
async def query(
|
||||||
|
self, query: str, top_k: int, ids: list[str] | None = None
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
"""Query the vector storage and retrieve top_k results."""
|
"""Query the vector storage and retrieve top_k results."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@@ -439,7 +439,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
"content_vector": json.dumps(item["__vector__"].tolist()),
|
"content_vector": json.dumps(item["__vector__"].tolist()),
|
||||||
"chunk_id": item["source_id"],
|
"chunk_id": item["source_id"],
|
||||||
#TODO: add document_id
|
# TODO: add document_id
|
||||||
}
|
}
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
|
|
||||||
@@ -452,8 +452,8 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
"target_id": item["tgt_id"],
|
"target_id": item["tgt_id"],
|
||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
"content_vector": json.dumps(item["__vector__"].tolist()),
|
"content_vector": json.dumps(item["__vector__"].tolist()),
|
||||||
"chunk_id": item["source_id"]
|
"chunk_id": item["source_id"],
|
||||||
#TODO: add document_id
|
# TODO: add document_id
|
||||||
}
|
}
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
|
|
||||||
@@ -496,7 +496,9 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
await self.db.execute(upsert_sql, data)
|
await self.db.execute(upsert_sql, data)
|
||||||
|
|
||||||
#################### query method ###############
|
#################### query method ###############
|
||||||
async def query(self, query: str, top_k: int, ids: list[str] | None = None) -> list[dict[str, Any]]:
|
async def query(
|
||||||
|
self, query: str, top_k: int, ids: list[str] | None = None
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
embeddings = await self.embedding_func([query])
|
embeddings = await self.embedding_func([query])
|
||||||
embedding = embeddings[0]
|
embedding = embeddings[0]
|
||||||
embedding_string = ",".join(map(str, embedding))
|
embedding_string = ",".join(map(str, embedding))
|
||||||
@@ -507,8 +509,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
formatted_ids = "NULL"
|
formatted_ids = "NULL"
|
||||||
|
|
||||||
sql = SQL_TEMPLATES[self.base_namespace].format(
|
sql = SQL_TEMPLATES[self.base_namespace].format(
|
||||||
embedding_string=embedding_string,
|
embedding_string=embedding_string, doc_ids=formatted_ids
|
||||||
doc_ids=formatted_ids
|
|
||||||
)
|
)
|
||||||
params = {
|
params = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.db.workspace,
|
||||||
@@ -1672,8 +1673,7 @@ SQL_TEMPLATES = {
|
|||||||
ORDER BY distance DESC
|
ORDER BY distance DESC
|
||||||
LIMIT $3
|
LIMIT $3
|
||||||
""",
|
""",
|
||||||
"entities":
|
"entities": """
|
||||||
'''
|
|
||||||
WITH relevant_chunks AS (
|
WITH relevant_chunks AS (
|
||||||
SELECT id as chunk_id
|
SELECT id as chunk_id
|
||||||
FROM LIGHTRAG_DOC_CHUNKS
|
FROM LIGHTRAG_DOC_CHUNKS
|
||||||
@@ -1689,8 +1689,8 @@ SQL_TEMPLATES = {
|
|||||||
WHERE distance>$2
|
WHERE distance>$2
|
||||||
ORDER BY distance DESC
|
ORDER BY distance DESC
|
||||||
LIMIT $3
|
LIMIT $3
|
||||||
''',
|
""",
|
||||||
'chunks': """
|
"chunks": """
|
||||||
WITH relevant_chunks AS (
|
WITH relevant_chunks AS (
|
||||||
SELECT id as chunk_id
|
SELECT id as chunk_id
|
||||||
FROM LIGHTRAG_DOC_CHUNKS
|
FROM LIGHTRAG_DOC_CHUNKS
|
||||||
@@ -1706,5 +1706,5 @@ SQL_TEMPLATES = {
|
|||||||
WHERE distance>$2
|
WHERE distance>$2
|
||||||
ORDER BY distance DESC
|
ORDER BY distance DESC
|
||||||
LIMIT $3
|
LIMIT $3
|
||||||
"""
|
""",
|
||||||
}
|
}
|
@@ -893,7 +893,9 @@ async def mix_kg_vector_query(
|
|||||||
# Reduce top_k for vector search in hybrid mode since we have structured information from KG
|
# Reduce top_k for vector search in hybrid mode since we have structured information from KG
|
||||||
mix_topk = min(10, query_param.top_k)
|
mix_topk = min(10, query_param.top_k)
|
||||||
# TODO: add ids to the query
|
# TODO: add ids to the query
|
||||||
results = await chunks_vdb.query(augmented_query, top_k=mix_topk, ids = query_param.ids)
|
results = await chunks_vdb.query(
|
||||||
|
augmented_query, top_k=mix_topk, ids=query_param.ids
|
||||||
|
)
|
||||||
if not results:
|
if not results:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -1102,7 +1104,9 @@ async def _get_node_data(
|
|||||||
f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {entities_vdb.cosine_better_than_threshold}"
|
f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {entities_vdb.cosine_better_than_threshold}"
|
||||||
)
|
)
|
||||||
|
|
||||||
results = await entities_vdb.query(query, top_k=query_param.top_k, ids = query_param.ids)
|
results = await entities_vdb.query(
|
||||||
|
query, top_k=query_param.top_k, ids=query_param.ids
|
||||||
|
)
|
||||||
|
|
||||||
if not len(results):
|
if not len(results):
|
||||||
return "", "", ""
|
return "", "", ""
|
||||||
@@ -1357,7 +1361,9 @@ async def _get_edge_data(
|
|||||||
f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
|
f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
|
||||||
)
|
)
|
||||||
|
|
||||||
results = await relationships_vdb.query(keywords, top_k = query_param.top_k, ids = query_param.ids)
|
results = await relationships_vdb.query(
|
||||||
|
keywords, top_k=query_param.top_k, ids=query_param.ids
|
||||||
|
)
|
||||||
|
|
||||||
if not len(results):
|
if not len(results):
|
||||||
return "", "", ""
|
return "", "", ""
|
||||||
@@ -1606,7 +1612,9 @@ async def naive_query(
|
|||||||
if cached_response is not None:
|
if cached_response is not None:
|
||||||
return cached_response
|
return cached_response
|
||||||
|
|
||||||
results = await chunks_vdb.query(query, top_k=query_param.top_k, ids = query_param.ids)
|
results = await chunks_vdb.query(
|
||||||
|
query, top_k=query_param.top_k, ids=query_param.ids
|
||||||
|
)
|
||||||
if not len(results):
|
if not len(results):
|
||||||
return PROMPTS["fail_response"]
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
|
@@ -1,53 +1,3 @@
|
|||||||
aioboto3==14.1.0
|
|
||||||
aiofiles==24.1.0
|
|
||||||
aiohttp==3.11.13
|
|
||||||
ascii_colors==0.5.2
|
|
||||||
asyncpg==0.30.0
|
|
||||||
chromadb==0.6.3
|
|
||||||
community==1.0.0b1
|
|
||||||
docx==0.2.4
|
|
||||||
# faiss
|
|
||||||
fastapi==0.115.11
|
|
||||||
glm==0.4.4
|
|
||||||
graspologic==3.4.1
|
|
||||||
gunicorn==23.0.0
|
|
||||||
httpx==0.28.1
|
|
||||||
imgui_bundle==1.6.2
|
|
||||||
jsonlines==4.0.0
|
|
||||||
llama_index==0.12.22
|
|
||||||
moderngl==5.12.0
|
|
||||||
motor==3.7.0
|
|
||||||
nano_vectordb==0.0.4.3
|
|
||||||
neo4j==5.28.1
|
|
||||||
nest_asyncio==1.6.0
|
|
||||||
networkx==3.4.2
|
|
||||||
numpy
|
|
||||||
openpyxl==3.1.5
|
|
||||||
oracledb==3.0.0
|
|
||||||
Pillow==11.1.0
|
|
||||||
pipmaster==0.4.0
|
|
||||||
protobuf
|
|
||||||
psutil==7.0.0
|
|
||||||
psycopg==3.2.5
|
|
||||||
psycopg_pool==3.2.6
|
|
||||||
pydantic==2.10.6
|
|
||||||
pymilvus==2.5.4
|
|
||||||
pymongo==4.11.2
|
|
||||||
PyPDF2==3.0.1
|
|
||||||
python-dotenv==1.0.1
|
|
||||||
pyvis==0.3.2
|
|
||||||
qdrant_client==1.13.3
|
|
||||||
redis==5.2.1
|
|
||||||
Requests==2.32.3
|
|
||||||
sentence_transformers==3.4.1
|
|
||||||
setuptools==75.8.0
|
|
||||||
SQLAlchemy==2.0.38
|
|
||||||
starlette==0.46.0
|
|
||||||
tenacity==9.0.0
|
|
||||||
tiktoken==0.9.0
|
|
||||||
torch==2.6.0
|
|
||||||
transformers==4.49.0
|
|
||||||
uvicorn==0.34.0
|
|
||||||
aiohttp
|
aiohttp
|
||||||
configparser
|
configparser
|
||||||
future
|
future
|
||||||
@@ -63,3 +13,5 @@ tenacity
|
|||||||
|
|
||||||
# LLM packages
|
# LLM packages
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
|
# Extra libraries are installed when needed using pipmaster
|
||||||
|
Reference in New Issue
Block a user