Merge branch 'postgres-doc-ids-filter-fix'

This commit is contained in:
yangdx
2025-04-22 11:28:29 +08:00

View File

@@ -649,17 +649,11 @@ class PGVectorStorage(BaseVectorStorage):
embeddings = await self.embedding_func([query]) embeddings = await self.embedding_func([query])
embedding = embeddings[0] embedding = embeddings[0]
embedding_string = ",".join(map(str, embedding)) embedding_string = ",".join(map(str, embedding))
# Use parameterized document IDs (None means search across all documents)
if ids: sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
formatted_ids = ",".join(f"'{id}'" for id in ids)
else:
formatted_ids = "NULL"
sql = SQL_TEMPLATES[self.namespace].format(
embedding_string=embedding_string, doc_ids=formatted_ids
)
params = { params = {
"workspace": self.db.workspace, "workspace": self.db.workspace,
"doc_ids": ids,
"better_than_threshold": self.cosine_better_than_threshold, "better_than_threshold": self.cosine_better_than_threshold,
"top_k": top_k, "top_k": top_k,
} }
@@ -2137,7 +2131,7 @@ SQL_TEMPLATES = {
WITH relevant_chunks AS ( WITH relevant_chunks AS (
SELECT id as chunk_id SELECT id as chunk_id
FROM LIGHTRAG_DOC_CHUNKS FROM LIGHTRAG_DOC_CHUNKS
WHERE {doc_ids} IS NULL OR full_doc_id = ANY(ARRAY[{doc_ids}]) WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
) )
SELECT source_id as src_id, target_id as tgt_id SELECT source_id as src_id, target_id as tgt_id
FROM ( FROM (
@@ -2146,15 +2140,15 @@ SQL_TEMPLATES = {
JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids) JOIN relevant_chunks c ON c.chunk_id = ANY(r.chunk_ids)
WHERE r.workspace=$1 WHERE r.workspace=$1
) filtered ) filtered
WHERE distance>$2 WHERE distance>$3
ORDER BY distance DESC ORDER BY distance DESC
LIMIT $3 LIMIT $4
""", """,
"entities": """ "entities": """
WITH relevant_chunks AS ( WITH relevant_chunks AS (
SELECT id as chunk_id SELECT id as chunk_id
FROM LIGHTRAG_DOC_CHUNKS FROM LIGHTRAG_DOC_CHUNKS
WHERE {doc_ids} IS NULL OR full_doc_id = ANY(ARRAY[{doc_ids}]) WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
) )
SELECT entity_name FROM SELECT entity_name FROM
( (
@@ -2163,26 +2157,26 @@ SQL_TEMPLATES = {
JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids) JOIN relevant_chunks c ON c.chunk_id = ANY(e.chunk_ids)
WHERE e.workspace=$1 WHERE e.workspace=$1
) as chunk_distances ) as chunk_distances
WHERE distance>$2 WHERE distance>$3
ORDER BY distance DESC ORDER BY distance DESC
LIMIT $3 LIMIT $4
""", """,
"chunks": """ "chunks": """
WITH relevant_chunks AS ( WITH relevant_chunks AS (
SELECT id as chunk_id SELECT id as chunk_id
FROM LIGHTRAG_DOC_CHUNKS FROM LIGHTRAG_DOC_CHUNKS
WHERE {doc_ids} IS NULL OR full_doc_id = ANY(ARRAY[{doc_ids}]) WHERE $2::varchar[] IS NULL OR full_doc_id = ANY($2::varchar[])
) )
SELECT id, content, file_path FROM SELECT id, content, file_path FROM
( (
SELECT id, content, file_path, 1 - (content_vector <=> '[{embedding_string}]'::vector) as distance SELECT id, content, file_path, 1 - (content_vector <=> '[{embedding_string}]'::vector) as distance
FROM LIGHTRAG_DOC_CHUNKS FROM LIGHTRAG_DOC_CHUNKS
where workspace=$1 WHERE workspace=$1
AND id IN (SELECT chunk_id FROM relevant_chunks) AND id IN (SELECT chunk_id FROM relevant_chunks)
) as chunk_distances ) as chunk_distances
WHERE distance>$2 WHERE distance>$3
ORDER BY distance DESC ORDER BY distance DESC
LIMIT $3 LIMIT $4
""", """,
# DROP tables # DROP tables
"drop_specifiy_table_workspace": """ "drop_specifiy_table_workspace": """