cleaned code

This commit is contained in:
Yannick Stephan
2025-02-09 21:48:19 +01:00
parent a47e09c69e
commit bfd280450a

View File

@@ -1,6 +1,5 @@
import asyncio import asyncio
import os import os
from collections.abc import Coroutine
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from datetime import datetime from datetime import datetime
from functools import partial from functools import partial
@@ -508,19 +507,6 @@ class LightRAG:
3. Process each chunk for entity and relation extraction 3. Process each chunk for entity and relation extraction
4. Update the document status 4. Update the document status
""" """
async def insert_full_doc(doc_id: str, content: str):
# Check if document is already processed
doc = await self.full_docs.get_by_id(doc_id)
if not doc:
await self.full_docs.upsert({doc_id: {"content": content}})
async def insert_text_chunks(doc_id: str, chunks: dict[str, Any]):
# Check if chunks are already processed
doc = await self.text_chunks.get_by_id(doc_id)
if not doc:
await self.text_chunks.upsert(chunks)
# 1. get all pending and failed documents # 1. get all pending and failed documents
to_process_docs: dict[str, DocProcessingStatus] = {} to_process_docs: dict[str, DocProcessingStatus] = {}
@@ -578,12 +564,12 @@ class LightRAG:
} }
# Process document (text chunks and full docs) in parallel # Process document (text chunks and full docs) in parallel
tasks: list[Coroutine[Any, Any, None]] = [] tasks = [
tasks.append(self.chunks_vdb.upsert(chunks)) self.chunks_vdb.upsert(chunks),
tasks.append(self._process_entity_relation_graph(chunks)) self._process_entity_relation_graph(chunks),
tasks.append(insert_full_doc(doc_id, status_doc.content)) self.full_docs.upsert({doc_id: {"content": status_doc.content}}),
tasks.append(insert_text_chunks(doc_id, chunks)) self.text_chunks.upsert(chunks),
]
try: try:
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
await self.doc_status.upsert( await self.doc_status.upsert(