cleaned code

This commit is contained in:
Yannick Stephan
2025-02-09 20:18:38 +01:00
parent 07e3d2b24f
commit 5e3100221c

View File

@@ -540,7 +540,7 @@ class LightRAG:
# 2. split docs into chunks, insert chunks, update doc status # 2. split docs into chunks, insert chunks, update doc status
batch_size = self.addon_params.get("insert_batch_size", 10) batch_size = self.addon_params.get("insert_batch_size", 10)
batch_docs_list = [ docs_batches = [
list(to_process_docs.items())[i : i + batch_size] list(to_process_docs.items())[i : i + batch_size]
for i in range(0, len(to_process_docs), batch_size) for i in range(0, len(to_process_docs), batch_size)
] ]
@@ -548,12 +548,12 @@ class LightRAG:
# 3. iterate over batches # 3. iterate over batches
tasks: dict[str, list[Coroutine[Any, Any, None]]] = {} tasks: dict[str, list[Coroutine[Any, Any, None]]] = {}
logger.info(f"Number of batches to process: {len(batch_docs_list)}.") logger.info(f"Number of batches to process: {len(docs_batches)}.")
for batch_idx, ids_doc_processing_status in enumerate(batch_docs_list): for batch_idx, docs_batch in enumerate(docs_batches):
# 4. iterate over batch # 4. iterate over batch
for id_doc_processing_status in ids_doc_processing_status: for doc_id_processing_status in docs_batch:
doc_id, status_doc = id_doc_processing_status doc_id, status_doc = doc_id_processing_status
# Update status in processing # Update status in processing
await self.doc_status.upsert( await self.doc_status.upsert(
{ {
@@ -570,7 +570,7 @@ class LightRAG:
chunks: dict[str, Any] = { chunks: dict[str, Any] = {
compute_mdhash_id(dp["content"], prefix="chunk-"): { compute_mdhash_id(dp["content"], prefix="chunk-"): {
**dp, **dp,
"full_doc_id": id_doc_processing_status, "full_doc_id": doc_id,
} }
for dp in self.chunking_func( for dp in self.chunking_func(
status_doc.content, status_doc.content,
@@ -627,7 +627,7 @@ class LightRAG:
} }
) )
continue continue
logger.info(f"Completed batch {batch_idx + 1} of {len(batch_docs_list)}.") logger.info(f"Completed batch {batch_idx + 1} of {len(docs_batches)}.")
async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None: async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
try: try: