diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index f9ab2333..efcded4c 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -687,7 +687,7 @@ class LightRAG: return update_storage = True - logger.info(f"[New Docs] inserting {len(new_docs)} docs") + logger.info(f"Inserting {len(new_docs)} docs") inserting_chunks: dict[str, Any] = {} for chunk_text in text_chunks: @@ -914,6 +914,7 @@ class LightRAG: if storage_inst is not None ] await asyncio.gather(*tasks) + logger.info("All Insert done") def insert_custom_kg(self, custom_kg: dict[str, Any]) -> None: loop = always_get_an_event_loop() diff --git a/lightrag/operate.py b/lightrag/operate.py index 3ada34ab..9552f2ed 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -491,11 +491,9 @@ async def extract_entities( already_processed += 1 already_entities += len(maybe_nodes) already_relations += len(maybe_edges) - now_ticks = PROMPTS["process_tickers"][ - already_processed % len(PROMPTS["process_tickers"]) - ] + logger.debug( - f"{now_ticks} Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r", + f"Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r", ) return dict(maybe_nodes), dict(maybe_edges) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index a25ab672..c6cbf7db 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -9,15 +9,14 @@ PROMPTS["DEFAULT_LANGUAGE"] = "English" PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>" PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##" PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>" -PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"] -PROMPTS["entity_extraction"] = """-Goal- +PROMPTS["entity_extraction"] = """---Goal--- Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities. Use {language} as output language. --Steps- +---Steps--- 1. Identify all entities. For each identified entity, extract the following information: - entity_name: Name of the entity, use same language as input text. If English, capitalized the name. - entity_type: One of the following types: [{entity_types}] @@ -41,18 +40,17 @@ Format the content-level key words as ("content_keywords"{tuple_delimiter}