cleaned code

This commit is contained in:
Yannick Stephan
2025-02-09 20:18:38 +01:00
parent 07e3d2b24f
commit 5e3100221c

View File

@@ -540,7 +540,7 @@ class LightRAG:
# 2. split docs into chunks, insert chunks, update doc status
batch_size = self.addon_params.get("insert_batch_size", 10)
batch_docs_list = [
docs_batches = [
list(to_process_docs.items())[i : i + batch_size]
for i in range(0, len(to_process_docs), batch_size)
]
@@ -548,12 +548,12 @@ class LightRAG:
# 3. iterate over batches
tasks: dict[str, list[Coroutine[Any, Any, None]]] = {}
logger.info(f"Number of batches to process: {len(batch_docs_list)}.")
logger.info(f"Number of batches to process: {len(docs_batches)}.")
for batch_idx, ids_doc_processing_status in enumerate(batch_docs_list):
for batch_idx, docs_batch in enumerate(docs_batches):
# 4. iterate over batch
for id_doc_processing_status in ids_doc_processing_status:
doc_id, status_doc = id_doc_processing_status
for doc_id_processing_status in docs_batch:
doc_id, status_doc = doc_id_processing_status
# Update status in processing
await self.doc_status.upsert(
{
@@ -570,7 +570,7 @@ class LightRAG:
chunks: dict[str, Any] = {
compute_mdhash_id(dp["content"], prefix="chunk-"): {
**dp,
"full_doc_id": id_doc_processing_status,
"full_doc_id": doc_id,
}
for dp in self.chunking_func(
status_doc.content,
@@ -627,7 +627,7 @@ class LightRAG:
}
)
continue
logger.info(f"Completed batch {batch_idx + 1} of {len(batch_docs_list)}.")
logger.info(f"Completed batch {batch_idx + 1} of {len(docs_batches)}.")
async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
try: