Merge pull request #443 from magicyuan876/main
fix(lightrag): 修复只有实体没有关系的chunk处理逻辑
This commit is contained in:
@@ -594,7 +594,7 @@ if __name__ == "__main__":
|
|||||||
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
|
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
|
||||||
| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | |
|
| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | |
|
||||||
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
||||||
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese"}`: sets example limit and output language | `example_number: all examples, language: English` |
|
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"]}`: sets example limit and output language | `example_number: all examples, language: English` |
|
||||||
| **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` |
|
| **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` |
|
||||||
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
||||||
|
|
||||||
|
@@ -30,6 +30,7 @@ from .utils import (
|
|||||||
wrap_embedding_func_with_attrs,
|
wrap_embedding_func_with_attrs,
|
||||||
locate_json_string_body_from_string,
|
locate_json_string_body_from_string,
|
||||||
safe_unicode_decode,
|
safe_unicode_decode,
|
||||||
|
logger,
|
||||||
)
|
)
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
@@ -69,6 +70,11 @@ async def openai_complete_if_cache(
|
|||||||
messages.extend(history_messages)
|
messages.extend(history_messages)
|
||||||
messages.append({"role": "user", "content": prompt})
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
# 添加日志输出
|
||||||
|
logger.debug("===== Query Input to LLM =====")
|
||||||
|
logger.debug(f"Query: {prompt}")
|
||||||
|
logger.debug(f"System prompt: {system_prompt}")
|
||||||
|
logger.debug("Full context:")
|
||||||
if "response_format" in kwargs:
|
if "response_format" in kwargs:
|
||||||
response = await openai_async_client.beta.chat.completions.parse(
|
response = await openai_async_client.beta.chat.completions.parse(
|
||||||
model=model, messages=messages, **kwargs
|
model=model, messages=messages, **kwargs
|
||||||
|
@@ -260,6 +260,9 @@ async def extract_entities(
|
|||||||
language = global_config["addon_params"].get(
|
language = global_config["addon_params"].get(
|
||||||
"language", PROMPTS["DEFAULT_LANGUAGE"]
|
"language", PROMPTS["DEFAULT_LANGUAGE"]
|
||||||
)
|
)
|
||||||
|
entity_types = global_config["addon_params"].get(
|
||||||
|
"entity_types", PROMPTS["DEFAULT_ENTITY_TYPES"]
|
||||||
|
)
|
||||||
example_number = global_config["addon_params"].get("example_number", None)
|
example_number = global_config["addon_params"].get("example_number", None)
|
||||||
if example_number and example_number < len(PROMPTS["entity_extraction_examples"]):
|
if example_number and example_number < len(PROMPTS["entity_extraction_examples"]):
|
||||||
examples = "\n".join(
|
examples = "\n".join(
|
||||||
@@ -272,7 +275,7 @@ async def extract_entities(
|
|||||||
tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
|
tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
|
||||||
record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
|
record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
|
||||||
completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
|
completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
|
||||||
entity_types=",".join(PROMPTS["DEFAULT_ENTITY_TYPES"]),
|
entity_types=",".join(entity_types),
|
||||||
language=language,
|
language=language,
|
||||||
)
|
)
|
||||||
# add example's format
|
# add example's format
|
||||||
@@ -283,7 +286,7 @@ async def extract_entities(
|
|||||||
tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
|
tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
|
||||||
record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
|
record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
|
||||||
completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
|
completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
|
||||||
entity_types=",".join(PROMPTS["DEFAULT_ENTITY_TYPES"]),
|
entity_types=",".join(entity_types),
|
||||||
examples=examples,
|
examples=examples,
|
||||||
language=language,
|
language=language,
|
||||||
)
|
)
|
||||||
@@ -412,15 +415,17 @@ async def extract_entities(
|
|||||||
):
|
):
|
||||||
all_relationships_data.append(await result)
|
all_relationships_data.append(await result)
|
||||||
|
|
||||||
if not len(all_entities_data):
|
if not len(all_entities_data) and not len(all_relationships_data):
|
||||||
logger.warning("Didn't extract any entities, maybe your LLM is not working")
|
|
||||||
return None
|
|
||||||
if not len(all_relationships_data):
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Didn't extract any relationships, maybe your LLM is not working"
|
"Didn't extract any entities and relationships, maybe your LLM is not working"
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if not len(all_entities_data):
|
||||||
|
logger.warning("Didn't extract any entities")
|
||||||
|
if not len(all_relationships_data):
|
||||||
|
logger.warning("Didn't extract any relationships")
|
||||||
|
|
||||||
if entity_vdb is not None:
|
if entity_vdb is not None:
|
||||||
data_for_vdb = {
|
data_for_vdb = {
|
||||||
compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
|
compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
|
||||||
@@ -630,6 +635,13 @@ async def _build_query_context(
|
|||||||
text_chunks_db,
|
text_chunks_db,
|
||||||
query_param,
|
query_param,
|
||||||
)
|
)
|
||||||
|
if (
|
||||||
|
hl_entities_context == ""
|
||||||
|
and hl_relations_context == ""
|
||||||
|
and hl_text_units_context == ""
|
||||||
|
):
|
||||||
|
logger.warn("No high level context found. Switching to local mode.")
|
||||||
|
query_param.mode = "local"
|
||||||
if query_param.mode == "hybrid":
|
if query_param.mode == "hybrid":
|
||||||
entities_context, relations_context, text_units_context = combine_contexts(
|
entities_context, relations_context, text_units_context = combine_contexts(
|
||||||
[hl_entities_context, ll_entities_context],
|
[hl_entities_context, ll_entities_context],
|
||||||
@@ -865,7 +877,7 @@ async def _get_edge_data(
|
|||||||
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
|
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
|
||||||
|
|
||||||
if not len(results):
|
if not len(results):
|
||||||
return None
|
return "", "", ""
|
||||||
|
|
||||||
edge_datas = await asyncio.gather(
|
edge_datas = await asyncio.gather(
|
||||||
*[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
|
*[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
|
||||||
|
@@ -8,7 +8,7 @@ PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
|
|||||||
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
|
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
|
||||||
PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
|
PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
|
||||||
|
|
||||||
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
|
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
|
||||||
|
|
||||||
PROMPTS["entity_extraction"] = """-Goal-
|
PROMPTS["entity_extraction"] = """-Goal-
|
||||||
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
|
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
|
||||||
@@ -268,14 +268,19 @@ PROMPTS[
|
|||||||
Question 1: {original_prompt}
|
Question 1: {original_prompt}
|
||||||
Question 2: {cached_prompt}
|
Question 2: {cached_prompt}
|
||||||
|
|
||||||
Please evaluate:
|
Please evaluate the following two points and provide a similarity score between 0 and 1 directly:
|
||||||
1. Whether these two questions are semantically similar
|
1. Whether these two questions are semantically similar
|
||||||
2. Whether the answer to Question 2 can be used to answer Question 1
|
2. Whether the answer to Question 2 can be used to answer Question 1
|
||||||
|
Similarity score criteria:
|
||||||
Please provide a similarity score between 0 and 1, where:
|
0: Completely unrelated or answer cannot be reused, including but not limited to:
|
||||||
0: Completely unrelated or answer cannot be reused
|
- The questions have different topics
|
||||||
|
- The locations mentioned in the questions are different
|
||||||
|
- The times mentioned in the questions are different
|
||||||
|
- The specific individuals mentioned in the questions are different
|
||||||
|
- The specific events mentioned in the questions are different
|
||||||
|
- The background information in the questions is different
|
||||||
|
- The key conditions in the questions are different
|
||||||
1: Identical and answer can be directly reused
|
1: Identical and answer can be directly reused
|
||||||
0.5: Partially related and answer needs modification to be used
|
0.5: Partially related and answer needs modification to be used
|
||||||
|
|
||||||
Return only a number between 0-1, without any additional content.
|
Return only a number between 0-1, without any additional content.
|
||||||
"""
|
"""
|
||||||
|
Reference in New Issue
Block a user