cleaned code
This commit is contained in:
@@ -540,7 +540,7 @@ class LightRAG:
|
|||||||
|
|
||||||
# 2. split docs into chunks, insert chunks, update doc status
|
# 2. split docs into chunks, insert chunks, update doc status
|
||||||
batch_size = self.addon_params.get("insert_batch_size", 10)
|
batch_size = self.addon_params.get("insert_batch_size", 10)
|
||||||
batch_docs_list = [
|
docs_batches = [
|
||||||
list(to_process_docs.items())[i : i + batch_size]
|
list(to_process_docs.items())[i : i + batch_size]
|
||||||
for i in range(0, len(to_process_docs), batch_size)
|
for i in range(0, len(to_process_docs), batch_size)
|
||||||
]
|
]
|
||||||
@@ -548,12 +548,12 @@ class LightRAG:
|
|||||||
# 3. iterate over batches
|
# 3. iterate over batches
|
||||||
tasks: dict[str, list[Coroutine[Any, Any, None]]] = {}
|
tasks: dict[str, list[Coroutine[Any, Any, None]]] = {}
|
||||||
|
|
||||||
logger.info(f"Number of batches to process: {len(batch_docs_list)}.")
|
logger.info(f"Number of batches to process: {len(docs_batches)}.")
|
||||||
|
|
||||||
for batch_idx, ids_doc_processing_status in enumerate(batch_docs_list):
|
for batch_idx, docs_batch in enumerate(docs_batches):
|
||||||
# 4. iterate over batch
|
# 4. iterate over batch
|
||||||
for id_doc_processing_status in ids_doc_processing_status:
|
for doc_id_processing_status in docs_batch:
|
||||||
doc_id, status_doc = id_doc_processing_status
|
doc_id, status_doc = doc_id_processing_status
|
||||||
# Update status in processing
|
# Update status in processing
|
||||||
await self.doc_status.upsert(
|
await self.doc_status.upsert(
|
||||||
{
|
{
|
||||||
@@ -570,7 +570,7 @@ class LightRAG:
|
|||||||
chunks: dict[str, Any] = {
|
chunks: dict[str, Any] = {
|
||||||
compute_mdhash_id(dp["content"], prefix="chunk-"): {
|
compute_mdhash_id(dp["content"], prefix="chunk-"): {
|
||||||
**dp,
|
**dp,
|
||||||
"full_doc_id": id_doc_processing_status,
|
"full_doc_id": doc_id,
|
||||||
}
|
}
|
||||||
for dp in self.chunking_func(
|
for dp in self.chunking_func(
|
||||||
status_doc.content,
|
status_doc.content,
|
||||||
@@ -627,7 +627,7 @@ class LightRAG:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
logger.info(f"Completed batch {batch_idx + 1} of {len(batch_docs_list)}.")
|
logger.info(f"Completed batch {batch_idx + 1} of {len(docs_batches)}.")
|
||||||
|
|
||||||
async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
|
async def _process_entity_relation_graph(self, chunk: dict[str, Any]) -> None:
|
||||||
try:
|
try:
|
||||||
|
Reference in New Issue
Block a user