cleaned code

This commit is contained in:
Yannick Stephan
2025-02-19 22:07:25 +01:00
parent 05914213e2
commit 8958046b74
3 changed files with 12 additions and 15 deletions

View File

@@ -687,7 +687,7 @@ class LightRAG:
return
update_storage = True
logger.info(f"[New Docs] inserting {len(new_docs)} docs")
logger.info(f"Inserting {len(new_docs)} docs")
inserting_chunks: dict[str, Any] = {}
for chunk_text in text_chunks:
@@ -914,6 +914,7 @@ class LightRAG:
if storage_inst is not None
]
await asyncio.gather(*tasks)
logger.info("All Insert done")
def insert_custom_kg(self, custom_kg: dict[str, Any]) -> None:
loop = always_get_an_event_loop()

View File

@@ -491,11 +491,9 @@ async def extract_entities(
already_processed += 1
already_entities += len(maybe_nodes)
already_relations += len(maybe_edges)
now_ticks = PROMPTS["process_tickers"][
already_processed % len(PROMPTS["process_tickers"])
]
logger.debug(
f"{now_ticks} Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r",
f"Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r",
)
return dict(maybe_nodes), dict(maybe_edges)

View File

@@ -9,15 +9,14 @@ PROMPTS["DEFAULT_LANGUAGE"] = "English"
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
PROMPTS["process_tickers"] = ["", "", "", "", "", "", "", "", "", ""]
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
PROMPTS["entity_extraction"] = """-Goal-
PROMPTS["entity_extraction"] = """---Goal---
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
Use {language} as output language.
-Steps-
---Steps---
1. Identify all entities. For each identified entity, extract the following information:
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
- entity_type: One of the following types: [{entity_types}]
@@ -41,18 +40,17 @@ Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_
5. When finished, output {completion_delimiter}
######################
-Examples-
---Examples---
######################
{examples}
#############################
-Real Data-
---Real Data---
######################
Entity_types: {entity_types}
Text: {input_text}
######################
Output:
"""
Output:"""
PROMPTS["entity_extraction_examples"] = [
"""Example 1:
@@ -137,7 +135,7 @@ Make sure it is written in third person, and include the entity names so we the
Use {language} as output language.
#######
-Data-
---Data---
Entities: {entity_name}
Description List: {description_list}
#######
@@ -205,12 +203,12 @@ Given the query and conversation history, list both high-level and low-level key
- "low_level_keywords" for specific entities or details
######################
-Examples-
---Examples---
######################
{examples}
#############################
-Real Data-
---Real Data---
######################
Conversation History:
{history}