Merge pull request #882 from YanSte/cleaning

cleaned code
This commit is contained in:
Yannick Stephan
2025-02-19 22:08:47 +01:00
committed by GitHub
3 changed files with 12 additions and 15 deletions

View File

@@ -688,7 +688,7 @@ class LightRAG:
return return
update_storage = True update_storage = True
logger.info(f"[New Docs] inserting {len(new_docs)} docs") logger.info(f"Inserting {len(new_docs)} docs")
inserting_chunks: dict[str, Any] = {} inserting_chunks: dict[str, Any] = {}
for chunk_text in text_chunks: for chunk_text in text_chunks:
@@ -915,6 +915,7 @@ class LightRAG:
if storage_inst is not None if storage_inst is not None
] ]
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
logger.info("All Insert done")
def insert_custom_kg(self, custom_kg: dict[str, Any]) -> None: def insert_custom_kg(self, custom_kg: dict[str, Any]) -> None:
loop = always_get_an_event_loop() loop = always_get_an_event_loop()

View File

@@ -491,11 +491,9 @@ async def extract_entities(
already_processed += 1 already_processed += 1
already_entities += len(maybe_nodes) already_entities += len(maybe_nodes)
already_relations += len(maybe_edges) already_relations += len(maybe_edges)
now_ticks = PROMPTS["process_tickers"][
already_processed % len(PROMPTS["process_tickers"])
]
logger.debug( logger.debug(
f"{now_ticks} Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r", f"Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r",
) )
return dict(maybe_nodes), dict(maybe_edges) return dict(maybe_nodes), dict(maybe_edges)

View File

@@ -9,15 +9,14 @@ PROMPTS["DEFAULT_LANGUAGE"] = "English"
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>" PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##" PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>" PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
PROMPTS["process_tickers"] = ["", "", "", "", "", "", "", "", "", ""]
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"] PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
PROMPTS["entity_extraction"] = """-Goal- PROMPTS["entity_extraction"] = """---Goal---
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities. Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
Use {language} as output language. Use {language} as output language.
-Steps- ---Steps---
1. Identify all entities. For each identified entity, extract the following information: 1. Identify all entities. For each identified entity, extract the following information:
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name. - entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
- entity_type: One of the following types: [{entity_types}] - entity_type: One of the following types: [{entity_types}]
@@ -41,18 +40,17 @@ Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_
5. When finished, output {completion_delimiter} 5. When finished, output {completion_delimiter}
###################### ######################
-Examples- ---Examples---
###################### ######################
{examples} {examples}
############################# #############################
-Real Data- ---Real Data---
###################### ######################
Entity_types: {entity_types} Entity_types: {entity_types}
Text: {input_text} Text: {input_text}
###################### ######################
Output: Output:"""
"""
PROMPTS["entity_extraction_examples"] = [ PROMPTS["entity_extraction_examples"] = [
"""Example 1: """Example 1:
@@ -137,7 +135,7 @@ Make sure it is written in third person, and include the entity names so we the
Use {language} as output language. Use {language} as output language.
####### #######
-Data- ---Data---
Entities: {entity_name} Entities: {entity_name}
Description List: {description_list} Description List: {description_list}
####### #######
@@ -205,12 +203,12 @@ Given the query and conversation history, list both high-level and low-level key
- "low_level_keywords" for specific entities or details - "low_level_keywords" for specific entities or details
###################### ######################
-Examples- ---Examples---
###################### ######################
{examples} {examples}
############################# #############################
-Real Data- ---Real Data---
###################### ######################
Conversation History: Conversation History:
{history} {history}