From 0ec61d640769a32b141afcf4ef58c5d5a7e9fe11 Mon Sep 17 00:00:00 2001
From: Roy <arindamroy11235@gmail.com>
Date: Fri, 7 Mar 2025 18:45:28 +0000
Subject: [PATCH] Update project dependencies and example test files

- Updated requirements.txt with latest package versions
- Added support for filtering query results by IDs in base and operate modules
- Modified PostgreSQL vector storage to include document and chunk ID fields
---
 .gitignore                   | 21 +++++++++++
 lightrag/base.py             |  5 ++-
 lightrag/kg/postgres_impl.py |  6 +++-
 lightrag/lightrag.py         |  1 +
 lightrag/operate.py          | 11 +++++-
 requirements.txt             | 67 +++++++++++++++++++++++++++---------
 6 files changed, 91 insertions(+), 20 deletions(-)

diff --git a/.gitignore b/.gitignore
index a4afe4ea..4f28427f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -64,3 +64,24 @@ gui/
 
 # unit-test files
 test_*
+Miniconda3-latest-Linux-x86_64.sh
+requirements_basic.txt
+requirements.txt
+examples/test_chromadb.py
+examples/test_faiss.py
+examples/test_neo4j.py
+.gitignore
+requirements.txt
+examples/test_chromadb.py
+examples/test_faiss.py
+examples/*
+tests/test_lightrag_ollama_chat.py
+requirements.txt
+requirements.txt
+examples/test_chromadb.py
+examples/test_faiss.py
+examples/test_neo4j.py
+tests/test_lightrag_ollama_chat.py
+examples/test_chromadb.py
+examples/test_faiss.py
+examples/test_neo4j.py
diff --git a/lightrag/base.py b/lightrag/base.py
index 5f6a1bf1..e7ab3127 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -81,6 +81,9 @@ class QueryParam:
     history_turns: int = 3
     """Number of complete conversation turns (user-assistant pairs) to consider in the response context."""
 
+    ids: list[str] | None = None
+    """List of ids to filter the results."""
+
 
 @dataclass
 class StorageNameSpace(ABC):
@@ -107,7 +110,7 @@ class BaseVectorStorage(StorageNameSpace, ABC):
     meta_fields: set[str] = field(default_factory=set)
 
     @abstractmethod
-    async def query(self, query: str, top_k: int) -> list[dict[str, Any]]:
+    async def query(self, query: str, top_k: int, ids: list[str] = None) -> list[dict[str, Any]]:
         """Query the vector storage and retrieve top_k results."""
 
     @abstractmethod
diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py
index 54a59f5d..a069cec0 100644
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -492,7 +492,7 @@ class PGVectorStorage(BaseVectorStorage):
             await self.db.execute(upsert_sql, data)
 
     #################### query method ###############
-    async def query(self, query: str, top_k: int) -> list[dict[str, Any]]:
+    async def query(self, query: str, top_k: int, ids: list[str] = None) -> list[dict[str, Any]]:
         embeddings = await self.embedding_func([query])
         embedding = embeddings[0]
         embedding_string = ",".join(map(str, embedding))
@@ -1387,6 +1387,8 @@ TABLES = {
                     content_vector VECTOR,
                     create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                     update_time TIMESTAMP,
+                    document_id VARCHAR(255) NULL,
+                    chunk_id VARCHAR(255) NULL,
 	                CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
                     )"""
     },
@@ -1400,6 +1402,8 @@ TABLES = {
                     content_vector VECTOR,
                     create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                     update_time TIMESTAMP,
+                    document_id VARCHAR(255) NULL,
+                    chunk_id VARCHAR(255) NULL,
 	                CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
                     )"""
     },
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 0554ab76..ae6fd9dc 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -1243,6 +1243,7 @@ class LightRAG:
                     embedding_func=self.embedding_func,
                 ),
                 system_prompt=system_prompt,
+                ids = param.ids
             )
         elif param.mode == "naive":
             response = await naive_query(
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 30983145..6c0e1e4c 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -602,6 +602,7 @@ async def kg_query(
     global_config: dict[str, str],
     hashing_kv: BaseKVStorage | None = None,
     system_prompt: str | None = None,
+    ids: list[str] | None = None,
 ) -> str | AsyncIterator[str]:
     # Handle cache
     use_model_func = global_config["llm_model_func"]
@@ -649,6 +650,7 @@ async def kg_query(
         relationships_vdb,
         text_chunks_db,
         query_param,
+        ids
     )
 
     if query_param.only_need_context:
@@ -1016,6 +1018,7 @@ async def _build_query_context(
     relationships_vdb: BaseVectorStorage,
     text_chunks_db: BaseKVStorage,
     query_param: QueryParam,
+    ids: list[str] = None,
 ):
     if query_param.mode == "local":
         entities_context, relations_context, text_units_context = await _get_node_data(
@@ -1032,6 +1035,7 @@ async def _build_query_context(
             relationships_vdb,
             text_chunks_db,
             query_param,
+            ids = ids
         )
     else:  # hybrid mode
         ll_data, hl_data = await asyncio.gather(
@@ -1348,11 +1352,16 @@ async def _get_edge_data(
     relationships_vdb: BaseVectorStorage,
     text_chunks_db: BaseKVStorage,
     query_param: QueryParam,
+    ids: list[str] | None = None,
 ):
     logger.info(
         f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
     )
-    results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
+    if ids: 
+        #TODO: add ids to the query
+        results = await relationships_vdb.query(keywords, top_k = query_param.top_k, ids = ids)
+    else:   
+        results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
 
     if not len(results):
         return "", "", ""
diff --git a/requirements.txt b/requirements.txt
index d9a5c68e..088d8843 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,50 @@
-aiohttp
-configparser
-future
-
-# Basic modules
-gensim
-pipmaster
-pydantic
-python-dotenv
-
-setuptools
-tenacity
-
-# LLM packages
-tiktoken
-
-# Extra libraries are installed when needed using pipmaster
+aioboto3==14.1.0
+aiofiles==24.1.0
+aiohttp==3.11.13
+ascii_colors==0.5.2
+asyncpg==0.30.0
+chromadb==0.6.3
+community==1.0.0b1
+docx==0.2.4
+# faiss
+fastapi==0.115.11
+glm==0.4.4
+graspologic==3.4.1
+gunicorn==23.0.0
+httpx==0.28.1
+imgui_bundle==1.6.2
+jsonlines==4.0.0
+llama_index==0.12.22
+moderngl==5.12.0
+motor==3.7.0
+nano_vectordb==0.0.4.3
+neo4j==5.28.1
+nest_asyncio==1.6.0
+networkx==3.4.2
+numpy
+openpyxl==3.1.5
+oracledb==3.0.0
+Pillow==11.1.0
+pipmaster==0.4.0
+protobuf
+psutil==7.0.0
+psycopg==3.2.5
+psycopg_pool==3.2.6
+pydantic==2.10.6
+pymilvus==2.5.4
+pymongo==4.11.2
+PyPDF2==3.0.1
+python-dotenv==1.0.1
+pyvis==0.3.2
+qdrant_client==1.13.3
+redis==5.2.1
+Requests==2.32.3
+sentence_transformers==3.4.1
+setuptools==75.8.0
+SQLAlchemy==2.0.38
+starlette==0.46.0
+tenacity==9.0.0
+tiktoken==0.9.0
+torch==2.6.0
+transformers==4.49.0
+uvicorn==0.34.0