Merge branch 'clear-text-before-insert' into simplify-cli-arguments

This commit is contained in:
yangdx
2025-02-23 17:06:39 +08:00
4 changed files with 6 additions and 7 deletions

View File

@@ -181,6 +181,8 @@ def create_app(args):
"Skip document scanning(another scanning is active)" "Skip document scanning(another scanning is active)"
) )
ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
yield yield
finally: finally:

View File

@@ -161,7 +161,7 @@ class DocumentManager:
"""Scan input directory for new files""" """Scan input directory for new files"""
new_files = [] new_files = []
for ext in self.supported_extensions: for ext in self.supported_extensions:
logging.info(f"Scanning for {ext} files in {self.input_dir}") logging.debug(f"Scanning for {ext} files in {self.input_dir}")
for file_path in self.input_dir.rglob(f"*{ext}"): for file_path in self.input_dir.rglob(f"*{ext}"):
if file_path not in self.indexed_files: if file_path not in self.indexed_files:
new_files.append(file_path) new_files.append(file_path)

View File

@@ -492,7 +492,5 @@ def display_splash_screen(args: argparse.Namespace) -> None:
Make sure to include the X-API-Key header in all your requests. Make sure to include the X-API-Key header in all your requests.
""") """)
ASCIIColors.green("Server is ready to accept connections! 🚀\n")
# Ensure splash output flush to system log # Ensure splash output flush to system log
sys.stdout.flush() sys.stdout.flush()

View File

@@ -581,7 +581,7 @@ class LightRAG:
await self._insert_done() await self._insert_done()
async def apipeline_enqueue_documents( async def apipeline_enqueue_documents(
self, input: str | list[str], ids: list[str] | None self, input: str | list[str], ids: list[str] | None = None
) -> None: ) -> None:
""" """
Pipeline for Processing Documents Pipeline for Processing Documents
@@ -595,9 +595,6 @@ class LightRAG:
if isinstance(input, str): if isinstance(input, str):
input = [input] input = [input]
# Clean input text and remove duplicates
input = list(set(self.clean_text(doc) for doc in input))
# 1. Validate ids if provided or generate MD5 hash IDs # 1. Validate ids if provided or generate MD5 hash IDs
if ids is not None: if ids is not None:
# Check if the number of IDs matches the number of documents # Check if the number of IDs matches the number of documents
@@ -611,6 +608,8 @@ class LightRAG:
# Generate contents dict of IDs provided by user and documents # Generate contents dict of IDs provided by user and documents
contents = {id_: doc for id_, doc in zip(ids, input)} contents = {id_: doc for id_, doc in zip(ids, input)}
else: else:
# Clean input text and remove duplicates
input = list(set(self.clean_text(doc) for doc in input))
# Generate contents dict of MD5 hash IDs and documents # Generate contents dict of MD5 hash IDs and documents
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input} contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}