Merge branch 'clear-text-before-insert' into simplify-cli-arguments
This commit is contained in:
@@ -181,6 +181,8 @@ def create_app(args):
|
|||||||
"Skip document scanning(another scanning is active)"
|
"Skip document scanning(another scanning is active)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ASCIIColors.green("\nServer is ready to accept connections! 🚀\n")
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
@@ -161,7 +161,7 @@ class DocumentManager:
|
|||||||
"""Scan input directory for new files"""
|
"""Scan input directory for new files"""
|
||||||
new_files = []
|
new_files = []
|
||||||
for ext in self.supported_extensions:
|
for ext in self.supported_extensions:
|
||||||
logging.info(f"Scanning for {ext} files in {self.input_dir}")
|
logging.debug(f"Scanning for {ext} files in {self.input_dir}")
|
||||||
for file_path in self.input_dir.rglob(f"*{ext}"):
|
for file_path in self.input_dir.rglob(f"*{ext}"):
|
||||||
if file_path not in self.indexed_files:
|
if file_path not in self.indexed_files:
|
||||||
new_files.append(file_path)
|
new_files.append(file_path)
|
||||||
|
@@ -492,7 +492,5 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|||||||
Make sure to include the X-API-Key header in all your requests.
|
Make sure to include the X-API-Key header in all your requests.
|
||||||
""")
|
""")
|
||||||
|
|
||||||
ASCIIColors.green("Server is ready to accept connections! 🚀\n")
|
|
||||||
|
|
||||||
# Ensure splash output flush to system log
|
# Ensure splash output flush to system log
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
@@ -581,7 +581,7 @@ class LightRAG:
|
|||||||
await self._insert_done()
|
await self._insert_done()
|
||||||
|
|
||||||
async def apipeline_enqueue_documents(
|
async def apipeline_enqueue_documents(
|
||||||
self, input: str | list[str], ids: list[str] | None
|
self, input: str | list[str], ids: list[str] | None = None
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Pipeline for Processing Documents
|
Pipeline for Processing Documents
|
||||||
@@ -595,9 +595,6 @@ class LightRAG:
|
|||||||
if isinstance(input, str):
|
if isinstance(input, str):
|
||||||
input = [input]
|
input = [input]
|
||||||
|
|
||||||
# Clean input text and remove duplicates
|
|
||||||
input = list(set(self.clean_text(doc) for doc in input))
|
|
||||||
|
|
||||||
# 1. Validate ids if provided or generate MD5 hash IDs
|
# 1. Validate ids if provided or generate MD5 hash IDs
|
||||||
if ids is not None:
|
if ids is not None:
|
||||||
# Check if the number of IDs matches the number of documents
|
# Check if the number of IDs matches the number of documents
|
||||||
@@ -611,6 +608,8 @@ class LightRAG:
|
|||||||
# Generate contents dict of IDs provided by user and documents
|
# Generate contents dict of IDs provided by user and documents
|
||||||
contents = {id_: doc for id_, doc in zip(ids, input)}
|
contents = {id_: doc for id_, doc in zip(ids, input)}
|
||||||
else:
|
else:
|
||||||
|
# Clean input text and remove duplicates
|
||||||
|
input = list(set(self.clean_text(doc) for doc in input))
|
||||||
# Generate contents dict of MD5 hash IDs and documents
|
# Generate contents dict of MD5 hash IDs and documents
|
||||||
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
|
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user