fixed issue with convert the age query to the dictionary;
refactored solution of storing chunk ids;
This commit is contained in:
@@ -432,19 +432,31 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
|
|
||||||
def _upsert_entities(self, item: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
def _upsert_entities(self, item: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||||
upsert_sql = SQL_TEMPLATES["upsert_entity"]
|
upsert_sql = SQL_TEMPLATES["upsert_entity"]
|
||||||
|
source_id = item["source_id"]
|
||||||
|
if isinstance(source_id, str) and "<SEP>" in source_id:
|
||||||
|
chunk_ids = source_id.split("<SEP>")
|
||||||
|
else:
|
||||||
|
chunk_ids = [source_id]
|
||||||
|
|
||||||
data: dict[str, Any] = {
|
data: dict[str, Any] = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.db.workspace,
|
||||||
"id": item["__id__"],
|
"id": item["__id__"],
|
||||||
"entity_name": item["entity_name"],
|
"entity_name": item["entity_name"],
|
||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
"content_vector": json.dumps(item["__vector__"].tolist()),
|
"content_vector": json.dumps(item["__vector__"].tolist()),
|
||||||
"chunk_id": item["source_id"],
|
"chunk_ids": chunk_ids,
|
||||||
# TODO: add document_id
|
# TODO: add document_id
|
||||||
}
|
}
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
|
|
||||||
def _upsert_relationships(self, item: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
def _upsert_relationships(self, item: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||||
upsert_sql = SQL_TEMPLATES["upsert_relationship"]
|
upsert_sql = SQL_TEMPLATES["upsert_relationship"]
|
||||||
|
source_id = item["source_id"]
|
||||||
|
if isinstance(source_id, str) and "<SEP>" in source_id:
|
||||||
|
chunk_ids = source_id.split("<SEP>")
|
||||||
|
else:
|
||||||
|
chunk_ids = [source_id]
|
||||||
|
|
||||||
data: dict[str, Any] = {
|
data: dict[str, Any] = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.db.workspace,
|
||||||
"id": item["__id__"],
|
"id": item["__id__"],
|
||||||
@@ -452,7 +464,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
"target_id": item["tgt_id"],
|
"target_id": item["tgt_id"],
|
||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
"content_vector": json.dumps(item["__vector__"].tolist()),
|
"content_vector": json.dumps(item["__vector__"].tolist()),
|
||||||
"chunk_id": item["source_id"],
|
"chunk_ids": chunk_ids,
|
||||||
# TODO: add document_id
|
# TODO: add document_id
|
||||||
}
|
}
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
@@ -950,10 +962,14 @@ class PGGraphStorage(BaseGraphStorage):
|
|||||||
vertices.get(edge["end_id"], {}),
|
vertices.get(edge["end_id"], {}),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if v is None or (v.count("{") < 1 and v.count("[") < 1):
|
if v is None:
|
||||||
d[k] = v
|
d[k] = v
|
||||||
|
elif isinstance(v, str) and (v.count("{") < 1 and v.count("[") < 1):
|
||||||
|
d[k] = v
|
||||||
|
elif isinstance(v, str):
|
||||||
|
d[k] = json.loads(v)
|
||||||
else:
|
else:
|
||||||
d[k] = json.loads(v) if isinstance(v, str) else v
|
d[k] = v
|
||||||
|
|
||||||
return d
|
return d
|
||||||
|
|
||||||
@@ -1556,7 +1572,7 @@ TABLES = {
|
|||||||
content_vector VECTOR,
|
content_vector VECTOR,
|
||||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMP,
|
update_time TIMESTAMP,
|
||||||
chunk_id VARCHAR(255) NULL,
|
chunk_ids VARCHAR(255)[] NULL,
|
||||||
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
||||||
)"""
|
)"""
|
||||||
},
|
},
|
||||||
@@ -1570,7 +1586,7 @@ TABLES = {
|
|||||||
content_vector VECTOR,
|
content_vector VECTOR,
|
||||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMP,
|
update_time TIMESTAMP,
|
||||||
chunk_id VARCHAR(255) NULL,
|
chunk_ids VARCHAR(255)[] NULL,
|
||||||
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
||||||
)"""
|
)"""
|
||||||
},
|
},
|
||||||
@@ -1654,22 +1670,25 @@ SQL_TEMPLATES = {
|
|||||||
update_time = CURRENT_TIMESTAMP
|
update_time = CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
"upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
|
"upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
|
||||||
content_vector, chunk_id)
|
content_vector, chunk_ids)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6)
|
VALUES ($1, $2, $3, $4, $5, $6::varchar[])
|
||||||
ON CONFLICT (workspace,id) DO UPDATE
|
ON CONFLICT (workspace,id) DO UPDATE
|
||||||
SET entity_name=EXCLUDED.entity_name,
|
SET entity_name=EXCLUDED.entity_name,
|
||||||
content=EXCLUDED.content,
|
content=EXCLUDED.content,
|
||||||
content_vector=EXCLUDED.content_vector,
|
content_vector=EXCLUDED.content_vector,
|
||||||
|
chunk_ids=EXCLUDED.chunk_ids,
|
||||||
update_time=CURRENT_TIMESTAMP
|
update_time=CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
"upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
|
"upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
|
||||||
target_id, content, content_vector, chunk_id)
|
target_id, content, content_vector, chunk_ids)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
VALUES ($1, $2, $3, $4, $5, $6, $7::varchar[])
|
||||||
ON CONFLICT (workspace,id) DO UPDATE
|
ON CONFLICT (workspace,id) DO UPDATE
|
||||||
SET source_id=EXCLUDED.source_id,
|
SET source_id=EXCLUDED.source_id,
|
||||||
target_id=EXCLUDED.target_id,
|
target_id=EXCLUDED.target_id,
|
||||||
content=EXCLUDED.content,
|
content=EXCLUDED.content,
|
||||||
content_vector=EXCLUDED.content_vector, update_time = CURRENT_TIMESTAMP
|
content_vector=EXCLUDED.content_vector,
|
||||||
|
chunk_ids=EXCLUDED.chunk_ids,
|
||||||
|
update_time = CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
# SQL for VectorStorage
|
# SQL for VectorStorage
|
||||||
# "entities": """SELECT entity_name FROM
|
# "entities": """SELECT entity_name FROM
|
||||||
@@ -1720,8 +1739,8 @@ SQL_TEMPLATES = {
|
|||||||
FROM (
|
FROM (
|
||||||
SELECT r.id, r.source_id, r.target_id, 1 - (r.content_vector <=> '[{embedding_string}]'::vector) as distance
|
SELECT r.id, r.source_id, r.target_id, 1 - (r.content_vector <=> '[{embedding_string}]'::vector) as distance
|
||||||
FROM LIGHTRAG_VDB_RELATION r
|
FROM LIGHTRAG_VDB_RELATION r
|
||||||
|
JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids)
|
||||||
WHERE r.workspace=$1
|
WHERE r.workspace=$1
|
||||||
AND r.chunk_id IN (SELECT chunk_id FROM relevant_chunks)
|
|
||||||
) filtered
|
) filtered
|
||||||
WHERE distance>$2
|
WHERE distance>$2
|
||||||
ORDER BY distance DESC
|
ORDER BY distance DESC
|
||||||
@@ -1735,10 +1754,10 @@ SQL_TEMPLATES = {
|
|||||||
)
|
)
|
||||||
SELECT entity_name FROM
|
SELECT entity_name FROM
|
||||||
(
|
(
|
||||||
SELECT id, entity_name, 1 - (content_vector <=> '[{embedding_string}]'::vector) as distance
|
SELECT e.id, e.entity_name, 1 - (e.content_vector <=> '[{embedding_string}]'::vector) as distance
|
||||||
FROM LIGHTRAG_VDB_ENTITY
|
FROM LIGHTRAG_VDB_ENTITY e
|
||||||
where workspace=$1
|
JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids)
|
||||||
AND chunk_id IN (SELECT chunk_id FROM relevant_chunks)
|
WHERE e.workspace=$1
|
||||||
)
|
)
|
||||||
WHERE distance>$2
|
WHERE distance>$2
|
||||||
ORDER BY distance DESC
|
ORDER BY distance DESC
|
||||||
|
Reference in New Issue
Block a user