Refactor requirements and code formatting

- Simplified requirements.txt by removing specific version constraints - Added comment about extra library installation using pipmaster - Improved code formatting in base.py, operate.py, and postgres_impl.py - Cleaned up SQL templates and query method signatures with consistent formatting
2025-03-10 15:39:18 +00:00
parent 3fa6d8757a
commit 92ae895713
4 changed files with 48 additions and 85 deletions
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -108,8 +108,11 @@ class BaseVectorStorage(StorageNameSpace, ABC):
    embedding_func: EmbeddingFunc
    cosine_better_than_threshold: float = field(default=0.2)
    meta_fields: set[str] = field(default_factory=set)
    @abstractmethod
-    async def query(self, query: str, top_k: int, ids: list[str] | None = None) -> list[dict[str, Any]]:
+    async def query(
        self, query: str, top_k: int, ids: list[str] | None = None
    ) -> list[dict[str, Any]]:
        """Query the vector storage and retrieve top_k results."""
    @abstractmethod
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -439,7 +439,7 @@ class PGVectorStorage(BaseVectorStorage):
            "content": item["content"],
            "content_vector": json.dumps(item["__vector__"].tolist()),
            "chunk_id": item["source_id"],
-            #TODO: add document_id
+            # TODO: add document_id
        }
        return upsert_sql, data
@@ -452,8 +452,8 @@ class PGVectorStorage(BaseVectorStorage):
            "target_id": item["tgt_id"],
            "content": item["content"],
            "content_vector": json.dumps(item["__vector__"].tolist()),
-            "chunk_id": item["source_id"]
+            "chunk_id": item["source_id"],
-            #TODO: add document_id
+            # TODO: add document_id
        }
        return upsert_sql, data
@@ -496,7 +496,9 @@ class PGVectorStorage(BaseVectorStorage):
            await self.db.execute(upsert_sql, data)
    #################### query method ###############
-    async def query(self, query: str, top_k: int, ids: list[str] | None = None) -> list[dict[str, Any]]:
+    async def query(
        self, query: str, top_k: int, ids: list[str] | None = None
    ) -> list[dict[str, Any]]:
        embeddings = await self.embedding_func([query])
        embedding = embeddings[0]
        embedding_string = ",".join(map(str, embedding))
@@ -507,8 +509,7 @@ class PGVectorStorage(BaseVectorStorage):
            formatted_ids = "NULL"
        sql = SQL_TEMPLATES[self.base_namespace].format(
-            embedding_string=embedding_string,
+            embedding_string=embedding_string, doc_ids=formatted_ids
            doc_ids=formatted_ids
        )
        params = {
            "workspace": self.db.workspace,
@@ -1672,8 +1673,7 @@ SQL_TEMPLATES = {
    ORDER BY distance DESC
    LIMIT $3
    """,
-    "entities": 
+    "entities": """
    '''
        WITH relevant_chunks AS (
            SELECT id as chunk_id
            FROM LIGHTRAG_DOC_CHUNKS
@@ -1689,8 +1689,8 @@ SQL_TEMPLATES = {
        WHERE distance>$2
        ORDER BY distance DESC
        LIMIT $3
-    ''',
+    """,
-    'chunks': """
+    "chunks": """
        WITH relevant_chunks AS (
            SELECT id as chunk_id
            FROM LIGHTRAG_DOC_CHUNKS
@@ -1706,5 +1706,5 @@ SQL_TEMPLATES = {
            WHERE distance>$2
            ORDER BY distance DESC
            LIMIT $3
-    """
+    """,
 }
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -893,7 +893,9 @@ async def mix_kg_vector_query(
            # Reduce top_k for vector search in hybrid mode since we have structured information from KG
            mix_topk = min(10, query_param.top_k)
            # TODO: add ids to the query
-            results = await chunks_vdb.query(augmented_query, top_k=mix_topk, ids = query_param.ids)
+            results = await chunks_vdb.query(
                augmented_query, top_k=mix_topk, ids=query_param.ids
            )
            if not results:
                return None
@@ -1102,7 +1104,9 @@ async def _get_node_data(
        f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {entities_vdb.cosine_better_than_threshold}"
    )
-    results = await entities_vdb.query(query, top_k=query_param.top_k, ids = query_param.ids)
+    results = await entities_vdb.query(
        query, top_k=query_param.top_k, ids=query_param.ids
    )
    if not len(results):
        return "", "", ""
@@ -1357,7 +1361,9 @@ async def _get_edge_data(
        f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
    )
-    results = await relationships_vdb.query(keywords, top_k = query_param.top_k, ids = query_param.ids)
+    results = await relationships_vdb.query(
        keywords, top_k=query_param.top_k, ids=query_param.ids
    )
    if not len(results):
        return "", "", ""
@@ -1606,7 +1612,9 @@ async def naive_query(
    if cached_response is not None:
        return cached_response
-    results = await chunks_vdb.query(query, top_k=query_param.top_k, ids = query_param.ids)
+    results = await chunks_vdb.query(
        query, top_k=query_param.top_k, ids=query_param.ids
    )
    if not len(results):
        return PROMPTS["fail_response"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,53 +1,3 @@
 aioboto3==14.1.0
 aiofiles==24.1.0
 aiohttp==3.11.13
 ascii_colors==0.5.2
 asyncpg==0.30.0
 chromadb==0.6.3
 community==1.0.0b1
 docx==0.2.4
 # faiss
 fastapi==0.115.11
 glm==0.4.4
 graspologic==3.4.1
 gunicorn==23.0.0
 httpx==0.28.1
 imgui_bundle==1.6.2
 jsonlines==4.0.0
 llama_index==0.12.22
 moderngl==5.12.0
 motor==3.7.0
 nano_vectordb==0.0.4.3
 neo4j==5.28.1
 nest_asyncio==1.6.0
 networkx==3.4.2
 numpy
 openpyxl==3.1.5
 oracledb==3.0.0
 Pillow==11.1.0
 pipmaster==0.4.0
 protobuf
 psutil==7.0.0
 psycopg==3.2.5
 psycopg_pool==3.2.6
 pydantic==2.10.6
 pymilvus==2.5.4
 pymongo==4.11.2
 PyPDF2==3.0.1
 python-dotenv==1.0.1
 pyvis==0.3.2
 qdrant_client==1.13.3
 redis==5.2.1
 Requests==2.32.3
 sentence_transformers==3.4.1
 setuptools==75.8.0
 SQLAlchemy==2.0.38
 starlette==0.46.0
 tenacity==9.0.0
 tiktoken==0.9.0
 torch==2.6.0
 transformers==4.49.0
 uvicorn==0.34.0
 aiohttp
 configparser
 future
@@ -63,3 +13,5 @@ tenacity
 # LLM packages
 tiktoken
 # Extra libraries are installed when needed using pipmaster