Enhance PostgreSQL vector storage with chunk_id support

- Updated SQL templates for entity and relationship upsert to include chunk_id
- Modified PGVectorStorage methods to add chunk_id when inserting or updating records
- Expanded database schema to track chunk-level metadata
This commit is contained in:
Roy
2025-03-07 20:18:01 +00:00
parent 0ec61d6407
commit bbe139cfeb
2 changed files with 14 additions and 11 deletions

View File

@@ -438,6 +438,7 @@ class PGVectorStorage(BaseVectorStorage):
"entity_name": item["entity_name"], "entity_name": item["entity_name"],
"content": item["content"], "content": item["content"],
"content_vector": json.dumps(item["__vector__"].tolist()), "content_vector": json.dumps(item["__vector__"].tolist()),
"chunk_id": item["source_id"],
} }
return upsert_sql, data return upsert_sql, data
@@ -450,6 +451,7 @@ class PGVectorStorage(BaseVectorStorage):
"target_id": item["tgt_id"], "target_id": item["tgt_id"],
"content": item["content"], "content": item["content"],
"content_vector": json.dumps(item["__vector__"].tolist()), "content_vector": json.dumps(item["__vector__"].tolist()),
"chunk_id": item["source_id"]
} }
return upsert_sql, data return upsert_sql, data
@@ -1486,8 +1488,9 @@ SQL_TEMPLATES = {
content_vector=EXCLUDED.content_vector, content_vector=EXCLUDED.content_vector,
update_time = CURRENT_TIMESTAMP update_time = CURRENT_TIMESTAMP
""", """,
"upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content, content_vector) "upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
VALUES ($1, $2, $3, $4, $5) content_vector, chunk_id)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (workspace,id) DO UPDATE ON CONFLICT (workspace,id) DO UPDATE
SET entity_name=EXCLUDED.entity_name, SET entity_name=EXCLUDED.entity_name,
content=EXCLUDED.content, content=EXCLUDED.content,
@@ -1495,8 +1498,8 @@ SQL_TEMPLATES = {
update_time=CURRENT_TIMESTAMP update_time=CURRENT_TIMESTAMP
""", """,
"upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id, "upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
target_id, content, content_vector) target_id, content, content_vector, chunk_id)
VALUES ($1, $2, $3, $4, $5, $6) VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (workspace,id) DO UPDATE ON CONFLICT (workspace,id) DO UPDATE
SET source_id=EXCLUDED.source_id, SET source_id=EXCLUDED.source_id,
target_id=EXCLUDED.target_id, target_id=EXCLUDED.target_id,

View File

@@ -38,16 +38,16 @@ class McpError(Exception):
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
"server": { "server": {
"host": "localhost", "host": "host.docker.internal",
"port": 9621, "port": 11434,
"model": "lightrag:latest", "model": "llama3.2:latest",
"timeout": 300, "timeout": 300,
"max_retries": 1, "max_retries": 1,
"retry_delay": 1, "retry_delay": 1,
}, },
"test_cases": { "test_cases": {
"basic": {"query": "唐僧有几个徒弟"}, "basic": {"query": "How many disciples did Tang Seng have?"},
"generate": {"query": "电视剧西游记导演是谁"}, "generate": {"query": "Who directed the TV series Journey to the West?"},
}, },
} }
@@ -763,8 +763,8 @@ def parse_args() -> argparse.Namespace:
Configuration file (config.json): Configuration file (config.json):
{ {
"server": { "server": {
"host": "localhost", # Server address "host": "host.docker.internal", # Server address
"port": 9621, # Server port "port": 11434, # Server port
"model": "lightrag:latest" # Default model name "model": "lightrag:latest" # Default model name
}, },
"test_cases": { "test_cases": {