From b1cf41d24251c19ed95c6906da6571e9aeb02a8b Mon Sep 17 00:00:00 2001 From: jin <52519003+jin38324@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:19:42 +0800 Subject: [PATCH] fix bug --- .gitignore | 1 + lightrag/base.py | 2 +- lightrag/operate.py | 10 +++++++--- lightrag/prompt.py | 2 +- test.py | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index def738b2..65aaaa02 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ local_neo4jWorkDir/ neo4jWorkDir/ ignore_this.txt .venv/ +*.ignore.* diff --git a/lightrag/base.py b/lightrag/base.py index 97524472..b88acae2 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -84,7 +84,7 @@ class BaseKVStorage(Generic[T], StorageNameSpace): @dataclass class BaseGraphStorage(StorageNameSpace): - embedding_func: EmbeddingFunc + embedding_func: EmbeddingFunc = None async def has_node(self, node_id: str) -> bool: raise NotImplementedError diff --git a/lightrag/operate.py b/lightrag/operate.py index 04725d6a..3fcc80c8 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -16,6 +16,7 @@ from .utils import ( split_string_by_multi_markers, truncate_list_by_token_size, process_combine_contexts, + locate_json_string_body_from_string ) from .base import ( BaseGraphStorage, @@ -403,9 +404,10 @@ async def local_query( kw_prompt_temp = PROMPTS["keywords_extraction"] kw_prompt = kw_prompt_temp.format(query=query) result = await use_model_func(kw_prompt) + json_text = locate_json_string_body_from_string(result) try: - keywords_data = json.loads(result) + keywords_data = json.loads(json_text) keywords = keywords_data.get("low_level_keywords", []) keywords = ", ".join(keywords) except json.JSONDecodeError: @@ -670,9 +672,10 @@ async def global_query( kw_prompt_temp = PROMPTS["keywords_extraction"] kw_prompt = kw_prompt_temp.format(query=query) result = await use_model_func(kw_prompt) + json_text = locate_json_string_body_from_string(result) try: - keywords_data = json.loads(result) + keywords_data = json.loads(json_text) keywords = keywords_data.get("high_level_keywords", []) keywords = ", ".join(keywords) except json.JSONDecodeError: @@ -911,8 +914,9 @@ async def hybrid_query( kw_prompt = kw_prompt_temp.format(query=query) result = await use_model_func(kw_prompt) + json_text = locate_json_string_body_from_string(result) try: - keywords_data = json.loads(result) + keywords_data = json.loads(json_text) hl_keywords = keywords_data.get("high_level_keywords", []) ll_keywords = keywords_data.get("low_level_keywords", []) hl_keywords = ", ".join(hl_keywords) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index e0713859..5de116b3 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -14,7 +14,7 @@ Given a text document that is potentially relevant to this activity and a list o -Steps- 1. Identify all entities. For each identified entity, extract the following information: -- entity_name: Name of the entity, capitalized +- entity_name: Name of the entity, use same language as input text. If English, capitalized the name. - entity_type: One of the following types: [{entity_types}] - entity_description: Comprehensive description of the entity's attributes and activities Format each entity as ("entity"{tuple_delimiter}{tuple_delimiter}{tuple_delimiter} diff --git a/test.py b/test.py index 35c03afe..c5d7fec0 100644 --- a/test.py +++ b/test.py @@ -18,7 +18,7 @@ rag = LightRAG( # llm_model_func=gpt_4o_complete # Optionally, use a stronger model ) -with open("./book.txt") as f: +with open("./dickens/book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Perform naive search