From bfd280450abb5ce36d6f30a93e0b02b5776cfdc3 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Sun, 9 Feb 2025 21:48:19 +0100 Subject: [PATCH] cleaned code --- lightrag/lightrag.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 5f711335..d92b5ea4 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -1,6 +1,5 @@ import asyncio import os -from collections.abc import Coroutine from dataclasses import asdict, dataclass, field from datetime import datetime from functools import partial @@ -508,19 +507,6 @@ class LightRAG: 3. Process each chunk for entity and relation extraction 4. Update the document status """ - - async def insert_full_doc(doc_id: str, content: str): - # Check if document is already processed - doc = await self.full_docs.get_by_id(doc_id) - if not doc: - await self.full_docs.upsert({doc_id: {"content": content}}) - - async def insert_text_chunks(doc_id: str, chunks: dict[str, Any]): - # Check if chunks are already processed - doc = await self.text_chunks.get_by_id(doc_id) - if not doc: - await self.text_chunks.upsert(chunks) - # 1. get all pending and failed documents to_process_docs: dict[str, DocProcessingStatus] = {} @@ -578,12 +564,12 @@ class LightRAG: } # Process document (text chunks and full docs) in parallel - tasks: list[Coroutine[Any, Any, None]] = [] - tasks.append(self.chunks_vdb.upsert(chunks)) - tasks.append(self._process_entity_relation_graph(chunks)) - tasks.append(insert_full_doc(doc_id, status_doc.content)) - tasks.append(insert_text_chunks(doc_id, chunks)) - + tasks = [ + self.chunks_vdb.upsert(chunks), + self._process_entity_relation_graph(chunks), + self.full_docs.upsert({doc_id: {"content": status_doc.content}}), + self.text_chunks.upsert(chunks), + ] try: await asyncio.gather(*tasks) await self.doc_status.upsert(