From 0ec61d640769a32b141afcf4ef58c5d5a7e9fe11 Mon Sep 17 00:00:00 2001 From: Roy Date: Fri, 7 Mar 2025 18:45:28 +0000 Subject: [PATCH] Update project dependencies and example test files - Updated requirements.txt with latest package versions - Added support for filtering query results by IDs in base and operate modules - Modified PostgreSQL vector storage to include document and chunk ID fields --- .gitignore | 21 +++++++++++ lightrag/base.py | 5 ++- lightrag/kg/postgres_impl.py | 6 +++- lightrag/lightrag.py | 1 + lightrag/operate.py | 11 +++++- requirements.txt | 67 +++++++++++++++++++++++++++--------- 6 files changed, 91 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index a4afe4ea..4f28427f 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,24 @@ gui/ # unit-test files test_* +Miniconda3-latest-Linux-x86_64.sh +requirements_basic.txt +requirements.txt +examples/test_chromadb.py +examples/test_faiss.py +examples/test_neo4j.py +.gitignore +requirements.txt +examples/test_chromadb.py +examples/test_faiss.py +examples/* +tests/test_lightrag_ollama_chat.py +requirements.txt +requirements.txt +examples/test_chromadb.py +examples/test_faiss.py +examples/test_neo4j.py +tests/test_lightrag_ollama_chat.py +examples/test_chromadb.py +examples/test_faiss.py +examples/test_neo4j.py diff --git a/lightrag/base.py b/lightrag/base.py index 5f6a1bf1..e7ab3127 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -81,6 +81,9 @@ class QueryParam: history_turns: int = 3 """Number of complete conversation turns (user-assistant pairs) to consider in the response context.""" + ids: list[str] | None = None + """List of ids to filter the results.""" + @dataclass class StorageNameSpace(ABC): @@ -107,7 +110,7 @@ class BaseVectorStorage(StorageNameSpace, ABC): meta_fields: set[str] = field(default_factory=set) @abstractmethod - async def query(self, query: str, top_k: int) -> list[dict[str, Any]]: + async def query(self, query: str, top_k: int, ids: list[str] = None) -> list[dict[str, Any]]: """Query the vector storage and retrieve top_k results.""" @abstractmethod diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 54a59f5d..a069cec0 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -492,7 +492,7 @@ class PGVectorStorage(BaseVectorStorage): await self.db.execute(upsert_sql, data) #################### query method ############### - async def query(self, query: str, top_k: int) -> list[dict[str, Any]]: + async def query(self, query: str, top_k: int, ids: list[str] = None) -> list[dict[str, Any]]: embeddings = await self.embedding_func([query]) embedding = embeddings[0] embedding_string = ",".join(map(str, embedding)) @@ -1387,6 +1387,8 @@ TABLES = { content_vector VECTOR, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP, + document_id VARCHAR(255) NULL, + chunk_id VARCHAR(255) NULL, CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id) )""" }, @@ -1400,6 +1402,8 @@ TABLES = { content_vector VECTOR, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP, + document_id VARCHAR(255) NULL, + chunk_id VARCHAR(255) NULL, CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id) )""" }, diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 0554ab76..ae6fd9dc 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -1243,6 +1243,7 @@ class LightRAG: embedding_func=self.embedding_func, ), system_prompt=system_prompt, + ids = param.ids ) elif param.mode == "naive": response = await naive_query( diff --git a/lightrag/operate.py b/lightrag/operate.py index 30983145..6c0e1e4c 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -602,6 +602,7 @@ async def kg_query( global_config: dict[str, str], hashing_kv: BaseKVStorage | None = None, system_prompt: str | None = None, + ids: list[str] | None = None, ) -> str | AsyncIterator[str]: # Handle cache use_model_func = global_config["llm_model_func"] @@ -649,6 +650,7 @@ async def kg_query( relationships_vdb, text_chunks_db, query_param, + ids ) if query_param.only_need_context: @@ -1016,6 +1018,7 @@ async def _build_query_context( relationships_vdb: BaseVectorStorage, text_chunks_db: BaseKVStorage, query_param: QueryParam, + ids: list[str] = None, ): if query_param.mode == "local": entities_context, relations_context, text_units_context = await _get_node_data( @@ -1032,6 +1035,7 @@ async def _build_query_context( relationships_vdb, text_chunks_db, query_param, + ids = ids ) else: # hybrid mode ll_data, hl_data = await asyncio.gather( @@ -1348,11 +1352,16 @@ async def _get_edge_data( relationships_vdb: BaseVectorStorage, text_chunks_db: BaseKVStorage, query_param: QueryParam, + ids: list[str] | None = None, ): logger.info( f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}" ) - results = await relationships_vdb.query(keywords, top_k=query_param.top_k) + if ids: + #TODO: add ids to the query + results = await relationships_vdb.query(keywords, top_k = query_param.top_k, ids = ids) + else: + results = await relationships_vdb.query(keywords, top_k=query_param.top_k) if not len(results): return "", "", "" diff --git a/requirements.txt b/requirements.txt index d9a5c68e..088d8843 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,50 @@ -aiohttp -configparser -future - -# Basic modules -gensim -pipmaster -pydantic -python-dotenv - -setuptools -tenacity - -# LLM packages -tiktoken - -# Extra libraries are installed when needed using pipmaster +aioboto3==14.1.0 +aiofiles==24.1.0 +aiohttp==3.11.13 +ascii_colors==0.5.2 +asyncpg==0.30.0 +chromadb==0.6.3 +community==1.0.0b1 +docx==0.2.4 +# faiss +fastapi==0.115.11 +glm==0.4.4 +graspologic==3.4.1 +gunicorn==23.0.0 +httpx==0.28.1 +imgui_bundle==1.6.2 +jsonlines==4.0.0 +llama_index==0.12.22 +moderngl==5.12.0 +motor==3.7.0 +nano_vectordb==0.0.4.3 +neo4j==5.28.1 +nest_asyncio==1.6.0 +networkx==3.4.2 +numpy +openpyxl==3.1.5 +oracledb==3.0.0 +Pillow==11.1.0 +pipmaster==0.4.0 +protobuf +psutil==7.0.0 +psycopg==3.2.5 +psycopg_pool==3.2.6 +pydantic==2.10.6 +pymilvus==2.5.4 +pymongo==4.11.2 +PyPDF2==3.0.1 +python-dotenv==1.0.1 +pyvis==0.3.2 +qdrant_client==1.13.3 +redis==5.2.1 +Requests==2.32.3 +sentence_transformers==3.4.1 +setuptools==75.8.0 +SQLAlchemy==2.0.38 +starlette==0.46.0 +tenacity==9.0.0 +tiktoken==0.9.0 +torch==2.6.0 +transformers==4.49.0 +uvicorn==0.34.0