This commit is contained in:
jin
2024-11-11 15:19:42 +08:00
parent b1a0397d18
commit b1cf41d242
5 changed files with 11 additions and 6 deletions

1
.gitignore vendored
View File

@@ -10,3 +10,4 @@ local_neo4jWorkDir/
neo4jWorkDir/ neo4jWorkDir/
ignore_this.txt ignore_this.txt
.venv/ .venv/
*.ignore.*

View File

@@ -84,7 +84,7 @@ class BaseKVStorage(Generic[T], StorageNameSpace):
@dataclass @dataclass
class BaseGraphStorage(StorageNameSpace): class BaseGraphStorage(StorageNameSpace):
embedding_func: EmbeddingFunc embedding_func: EmbeddingFunc = None
async def has_node(self, node_id: str) -> bool: async def has_node(self, node_id: str) -> bool:
raise NotImplementedError raise NotImplementedError

View File

@@ -16,6 +16,7 @@ from .utils import (
split_string_by_multi_markers, split_string_by_multi_markers,
truncate_list_by_token_size, truncate_list_by_token_size,
process_combine_contexts, process_combine_contexts,
locate_json_string_body_from_string
) )
from .base import ( from .base import (
BaseGraphStorage, BaseGraphStorage,
@@ -403,9 +404,10 @@ async def local_query(
kw_prompt_temp = PROMPTS["keywords_extraction"] kw_prompt_temp = PROMPTS["keywords_extraction"]
kw_prompt = kw_prompt_temp.format(query=query) kw_prompt = kw_prompt_temp.format(query=query)
result = await use_model_func(kw_prompt) result = await use_model_func(kw_prompt)
json_text = locate_json_string_body_from_string(result)
try: try:
keywords_data = json.loads(result) keywords_data = json.loads(json_text)
keywords = keywords_data.get("low_level_keywords", []) keywords = keywords_data.get("low_level_keywords", [])
keywords = ", ".join(keywords) keywords = ", ".join(keywords)
except json.JSONDecodeError: except json.JSONDecodeError:
@@ -670,9 +672,10 @@ async def global_query(
kw_prompt_temp = PROMPTS["keywords_extraction"] kw_prompt_temp = PROMPTS["keywords_extraction"]
kw_prompt = kw_prompt_temp.format(query=query) kw_prompt = kw_prompt_temp.format(query=query)
result = await use_model_func(kw_prompt) result = await use_model_func(kw_prompt)
json_text = locate_json_string_body_from_string(result)
try: try:
keywords_data = json.loads(result) keywords_data = json.loads(json_text)
keywords = keywords_data.get("high_level_keywords", []) keywords = keywords_data.get("high_level_keywords", [])
keywords = ", ".join(keywords) keywords = ", ".join(keywords)
except json.JSONDecodeError: except json.JSONDecodeError:
@@ -911,8 +914,9 @@ async def hybrid_query(
kw_prompt = kw_prompt_temp.format(query=query) kw_prompt = kw_prompt_temp.format(query=query)
result = await use_model_func(kw_prompt) result = await use_model_func(kw_prompt)
json_text = locate_json_string_body_from_string(result)
try: try:
keywords_data = json.loads(result) keywords_data = json.loads(json_text)
hl_keywords = keywords_data.get("high_level_keywords", []) hl_keywords = keywords_data.get("high_level_keywords", [])
ll_keywords = keywords_data.get("low_level_keywords", []) ll_keywords = keywords_data.get("low_level_keywords", [])
hl_keywords = ", ".join(hl_keywords) hl_keywords = ", ".join(hl_keywords)

View File

@@ -14,7 +14,7 @@ Given a text document that is potentially relevant to this activity and a list o
-Steps- -Steps-
1. Identify all entities. For each identified entity, extract the following information: 1. Identify all entities. For each identified entity, extract the following information:
- entity_name: Name of the entity, capitalized - entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
- entity_type: One of the following types: [{entity_types}] - entity_type: One of the following types: [{entity_types}]
- entity_description: Comprehensive description of the entity's attributes and activities - entity_description: Comprehensive description of the entity's attributes and activities
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description> Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>

View File

@@ -18,7 +18,7 @@ rag = LightRAG(
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
) )
with open("./book.txt") as f: with open("./dickens/book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read()) rag.insert(f.read())
# Perform naive search # Perform naive search