Update project dependencies and example test files
- Updated requirements.txt with latest package versions - Added support for filtering query results by IDs in base and operate modules - Modified PostgreSQL vector storage to include document and chunk ID fields
This commit is contained in:
21
.gitignore
vendored
21
.gitignore
vendored
@@ -64,3 +64,24 @@ gui/
|
|||||||
|
|
||||||
# unit-test files
|
# unit-test files
|
||||||
test_*
|
test_*
|
||||||
|
Miniconda3-latest-Linux-x86_64.sh
|
||||||
|
requirements_basic.txt
|
||||||
|
requirements.txt
|
||||||
|
examples/test_chromadb.py
|
||||||
|
examples/test_faiss.py
|
||||||
|
examples/test_neo4j.py
|
||||||
|
.gitignore
|
||||||
|
requirements.txt
|
||||||
|
examples/test_chromadb.py
|
||||||
|
examples/test_faiss.py
|
||||||
|
examples/*
|
||||||
|
tests/test_lightrag_ollama_chat.py
|
||||||
|
requirements.txt
|
||||||
|
requirements.txt
|
||||||
|
examples/test_chromadb.py
|
||||||
|
examples/test_faiss.py
|
||||||
|
examples/test_neo4j.py
|
||||||
|
tests/test_lightrag_ollama_chat.py
|
||||||
|
examples/test_chromadb.py
|
||||||
|
examples/test_faiss.py
|
||||||
|
examples/test_neo4j.py
|
||||||
|
@@ -81,6 +81,9 @@ class QueryParam:
|
|||||||
history_turns: int = 3
|
history_turns: int = 3
|
||||||
"""Number of complete conversation turns (user-assistant pairs) to consider in the response context."""
|
"""Number of complete conversation turns (user-assistant pairs) to consider in the response context."""
|
||||||
|
|
||||||
|
ids: list[str] | None = None
|
||||||
|
"""List of ids to filter the results."""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class StorageNameSpace(ABC):
|
class StorageNameSpace(ABC):
|
||||||
@@ -107,7 +110,7 @@ class BaseVectorStorage(StorageNameSpace, ABC):
|
|||||||
meta_fields: set[str] = field(default_factory=set)
|
meta_fields: set[str] = field(default_factory=set)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def query(self, query: str, top_k: int) -> list[dict[str, Any]]:
|
async def query(self, query: str, top_k: int, ids: list[str] = None) -> list[dict[str, Any]]:
|
||||||
"""Query the vector storage and retrieve top_k results."""
|
"""Query the vector storage and retrieve top_k results."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@@ -492,7 +492,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
await self.db.execute(upsert_sql, data)
|
await self.db.execute(upsert_sql, data)
|
||||||
|
|
||||||
#################### query method ###############
|
#################### query method ###############
|
||||||
async def query(self, query: str, top_k: int) -> list[dict[str, Any]]:
|
async def query(self, query: str, top_k: int, ids: list[str] = None) -> list[dict[str, Any]]:
|
||||||
embeddings = await self.embedding_func([query])
|
embeddings = await self.embedding_func([query])
|
||||||
embedding = embeddings[0]
|
embedding = embeddings[0]
|
||||||
embedding_string = ",".join(map(str, embedding))
|
embedding_string = ",".join(map(str, embedding))
|
||||||
@@ -1387,6 +1387,8 @@ TABLES = {
|
|||||||
content_vector VECTOR,
|
content_vector VECTOR,
|
||||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMP,
|
update_time TIMESTAMP,
|
||||||
|
document_id VARCHAR(255) NULL,
|
||||||
|
chunk_id VARCHAR(255) NULL,
|
||||||
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
||||||
)"""
|
)"""
|
||||||
},
|
},
|
||||||
@@ -1400,6 +1402,8 @@ TABLES = {
|
|||||||
content_vector VECTOR,
|
content_vector VECTOR,
|
||||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMP,
|
update_time TIMESTAMP,
|
||||||
|
document_id VARCHAR(255) NULL,
|
||||||
|
chunk_id VARCHAR(255) NULL,
|
||||||
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
||||||
)"""
|
)"""
|
||||||
},
|
},
|
||||||
|
@@ -1243,6 +1243,7 @@ class LightRAG:
|
|||||||
embedding_func=self.embedding_func,
|
embedding_func=self.embedding_func,
|
||||||
),
|
),
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
|
ids = param.ids
|
||||||
)
|
)
|
||||||
elif param.mode == "naive":
|
elif param.mode == "naive":
|
||||||
response = await naive_query(
|
response = await naive_query(
|
||||||
|
@@ -602,6 +602,7 @@ async def kg_query(
|
|||||||
global_config: dict[str, str],
|
global_config: dict[str, str],
|
||||||
hashing_kv: BaseKVStorage | None = None,
|
hashing_kv: BaseKVStorage | None = None,
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
|
ids: list[str] | None = None,
|
||||||
) -> str | AsyncIterator[str]:
|
) -> str | AsyncIterator[str]:
|
||||||
# Handle cache
|
# Handle cache
|
||||||
use_model_func = global_config["llm_model_func"]
|
use_model_func = global_config["llm_model_func"]
|
||||||
@@ -649,6 +650,7 @@ async def kg_query(
|
|||||||
relationships_vdb,
|
relationships_vdb,
|
||||||
text_chunks_db,
|
text_chunks_db,
|
||||||
query_param,
|
query_param,
|
||||||
|
ids
|
||||||
)
|
)
|
||||||
|
|
||||||
if query_param.only_need_context:
|
if query_param.only_need_context:
|
||||||
@@ -1016,6 +1018,7 @@ async def _build_query_context(
|
|||||||
relationships_vdb: BaseVectorStorage,
|
relationships_vdb: BaseVectorStorage,
|
||||||
text_chunks_db: BaseKVStorage,
|
text_chunks_db: BaseKVStorage,
|
||||||
query_param: QueryParam,
|
query_param: QueryParam,
|
||||||
|
ids: list[str] = None,
|
||||||
):
|
):
|
||||||
if query_param.mode == "local":
|
if query_param.mode == "local":
|
||||||
entities_context, relations_context, text_units_context = await _get_node_data(
|
entities_context, relations_context, text_units_context = await _get_node_data(
|
||||||
@@ -1032,6 +1035,7 @@ async def _build_query_context(
|
|||||||
relationships_vdb,
|
relationships_vdb,
|
||||||
text_chunks_db,
|
text_chunks_db,
|
||||||
query_param,
|
query_param,
|
||||||
|
ids = ids
|
||||||
)
|
)
|
||||||
else: # hybrid mode
|
else: # hybrid mode
|
||||||
ll_data, hl_data = await asyncio.gather(
|
ll_data, hl_data = await asyncio.gather(
|
||||||
@@ -1348,10 +1352,15 @@ async def _get_edge_data(
|
|||||||
relationships_vdb: BaseVectorStorage,
|
relationships_vdb: BaseVectorStorage,
|
||||||
text_chunks_db: BaseKVStorage,
|
text_chunks_db: BaseKVStorage,
|
||||||
query_param: QueryParam,
|
query_param: QueryParam,
|
||||||
|
ids: list[str] | None = None,
|
||||||
):
|
):
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
|
f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
|
||||||
)
|
)
|
||||||
|
if ids:
|
||||||
|
#TODO: add ids to the query
|
||||||
|
results = await relationships_vdb.query(keywords, top_k = query_param.top_k, ids = ids)
|
||||||
|
else:
|
||||||
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
|
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
|
||||||
|
|
||||||
if not len(results):
|
if not len(results):
|
||||||
|
@@ -1,17 +1,50 @@
|
|||||||
aiohttp
|
aioboto3==14.1.0
|
||||||
configparser
|
aiofiles==24.1.0
|
||||||
future
|
aiohttp==3.11.13
|
||||||
|
ascii_colors==0.5.2
|
||||||
# Basic modules
|
asyncpg==0.30.0
|
||||||
gensim
|
chromadb==0.6.3
|
||||||
pipmaster
|
community==1.0.0b1
|
||||||
pydantic
|
docx==0.2.4
|
||||||
python-dotenv
|
# faiss
|
||||||
|
fastapi==0.115.11
|
||||||
setuptools
|
glm==0.4.4
|
||||||
tenacity
|
graspologic==3.4.1
|
||||||
|
gunicorn==23.0.0
|
||||||
# LLM packages
|
httpx==0.28.1
|
||||||
tiktoken
|
imgui_bundle==1.6.2
|
||||||
|
jsonlines==4.0.0
|
||||||
# Extra libraries are installed when needed using pipmaster
|
llama_index==0.12.22
|
||||||
|
moderngl==5.12.0
|
||||||
|
motor==3.7.0
|
||||||
|
nano_vectordb==0.0.4.3
|
||||||
|
neo4j==5.28.1
|
||||||
|
nest_asyncio==1.6.0
|
||||||
|
networkx==3.4.2
|
||||||
|
numpy
|
||||||
|
openpyxl==3.1.5
|
||||||
|
oracledb==3.0.0
|
||||||
|
Pillow==11.1.0
|
||||||
|
pipmaster==0.4.0
|
||||||
|
protobuf
|
||||||
|
psutil==7.0.0
|
||||||
|
psycopg==3.2.5
|
||||||
|
psycopg_pool==3.2.6
|
||||||
|
pydantic==2.10.6
|
||||||
|
pymilvus==2.5.4
|
||||||
|
pymongo==4.11.2
|
||||||
|
PyPDF2==3.0.1
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
pyvis==0.3.2
|
||||||
|
qdrant_client==1.13.3
|
||||||
|
redis==5.2.1
|
||||||
|
Requests==2.32.3
|
||||||
|
sentence_transformers==3.4.1
|
||||||
|
setuptools==75.8.0
|
||||||
|
SQLAlchemy==2.0.38
|
||||||
|
starlette==0.46.0
|
||||||
|
tenacity==9.0.0
|
||||||
|
tiktoken==0.9.0
|
||||||
|
torch==2.6.0
|
||||||
|
transformers==4.49.0
|
||||||
|
uvicorn==0.34.0
|
||||||
|
Reference in New Issue
Block a user