From 58bdc739f567ee9bd489257d848ce9afc6ae52d8 Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Wed, 27 Nov 2024 18:48:10 +0800
Subject: [PATCH 1/2] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 893969f9..4181e98a 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@
-
+
From ba256144236deaf1bb8c46badd134263410a39ad Mon Sep 17 00:00:00 2001
From: Sebastian Schramm
Date: Thu, 28 Nov 2024 14:28:29 +0100
Subject: [PATCH 2/2] fix templating of language in prompts
---
lightrag/operate.py | 9 ++++++++-
lightrag/prompt.py | 6 +++---
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/lightrag/operate.py b/lightrag/operate.py
index c761519f..94cd412b 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -59,6 +59,9 @@ async def _handle_entity_relation_summary(
llm_max_tokens = global_config["llm_model_max_token_size"]
tiktoken_model_name = global_config["tiktoken_model_name"]
summary_max_tokens = global_config["entity_summary_to_max_tokens"]
+ language = global_config["addon_params"].get(
+ "language", PROMPTS["DEFAULT_LANGUAGE"]
+ )
tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name)
if len(tokens) < summary_max_tokens: # No need for summary
@@ -70,6 +73,7 @@ async def _handle_entity_relation_summary(
context_base = dict(
entity_name=entity_or_relation_name,
description_list=use_description.split(GRAPH_FIELD_SEP),
+ language=language,
)
use_prompt = prompt_template.format(**context_base)
logger.debug(f"Trigger summary: {entity_or_relation_name}")
@@ -444,6 +448,9 @@ async def kg_query(
)
else:
examples = "\n".join(PROMPTS["keywords_extraction_examples"])
+ language = global_config["addon_params"].get(
+ "language", PROMPTS["DEFAULT_LANGUAGE"]
+ )
# Set mode
if query_param.mode not in ["local", "global", "hybrid"]:
@@ -453,7 +460,7 @@ async def kg_query(
# LLM generate keywords
use_model_func = global_config["llm_model_func"]
kw_prompt_temp = PROMPTS["keywords_extraction"]
- kw_prompt = kw_prompt_temp.format(query=query, examples=examples)
+ kw_prompt = kw_prompt_temp.format(query=query, examples=examples, language=language)
result = await use_model_func(kw_prompt)
logger.info("kw_prompt result:")
print(result)
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 0d4e599d..5e71c081 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -33,7 +33,7 @@ Format each relationship as ("relationship"{tuple_delimiter}{tupl
3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document.
Format the content-level key words as ("content_keywords"{tuple_delimiter})
-4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
+4. Return output in {language} as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
5. When finished, output {completion_delimiter}
@@ -131,7 +131,7 @@ Given one or two entities, and a list of descriptions, all related to the same e
Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
Make sure it is written in third person, and include the entity names so we the have full context.
-Use Chinese as output language.
+Use {language} as output language.
#######
-Data-
@@ -178,7 +178,7 @@ Add sections and commentary to the response as appropriate for the length and fo
PROMPTS["keywords_extraction"] = """---Role---
You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
-Use Chinese as output language.
+Use {language} as output language.
---Goal---