fixed filtering

This commit is contained in:
Yannick Stephan
2025-02-09 13:16:21 +01:00
parent bf89dc18b7
commit 263a301179

View File

@@ -396,7 +396,6 @@ class LightRAG:
""" """
await self.apipeline_process_documents(string_or_strings) await self.apipeline_process_documents(string_or_strings)
await self.apipeline_process_chunks(split_by_character, split_by_character_only) await self.apipeline_process_chunks(split_by_character, split_by_character_only)
await self.apipeline_process_extract_graph()
def insert_custom_chunks(self, full_text: str, text_chunks: list[str]): def insert_custom_chunks(self, full_text: str, text_chunks: list[str]):
loop = always_get_an_event_loop() loop = always_get_an_event_loop()
@@ -544,8 +543,10 @@ class LightRAG:
return return
# If included in text_chunks is all processed, return # If included in text_chunks is all processed, return
new_docs_ids = await self.text_chunks.filter_keys(to_process_doc_keys) new_docs = await self.doc_status.get_by_ids(to_process_doc_keys)
new_docs = await self.doc_status.get_by_ids(list(new_docs_ids))
text_chunks_new_docs_ids = await self.text_chunks.filter_keys(to_process_doc_keys)
full_docs_new_docs_ids = await self.full_docs.filter_keys(to_process_doc_keys)
if not new_docs: if not new_docs:
logger.info("All documents have been processed or are duplicates") logger.info("All documents have been processed or are duplicates")
@@ -582,12 +583,19 @@ class LightRAG:
self.tiktoken_model_name, self.tiktoken_model_name,
) )
} }
await self.chunks_vdb.upsert(chunks)
# Update status with chunks information # Update status with chunks information
await self._process_entity_relation_graph(chunks) await self._process_entity_relation_graph(chunks)
await self.chunks_vdb.upsert(chunks)
await self.text_chunks.upsert(chunks) if not doc_id in full_docs_new_docs_ids:
await self.full_docs.upsert(
{doc_id: {"content": doc["content"]}}
)
if not doc_id in text_chunks_new_docs_ids:
await self.text_chunks.upsert(chunks)
doc_status.update( doc_status.update(
{ {
"status": DocStatus.PROCESSED, "status": DocStatus.PROCESSED,