From 0a41cc8a9aa33bb3d91f8ea6290e3423721eda9f Mon Sep 17 00:00:00 2001 From: Magic_yuan <317617749@qq.com> Date: Wed, 11 Dec 2024 12:45:10 +0800 Subject: [PATCH] =?UTF-8?q?feat(llm,=20prompt):=E5=A2=9E=E5=8A=A0=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E8=BE=93=E5=87=BA=E5=B9=B6=E6=89=A9=E5=B1=95=E5=AE=9E?= =?UTF-8?q?=E4=BD=93=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 llm.py 中添加了日志输出,用于调试和记录 LLM 查询输入 - 在 prompt.py 中增加了 "category" 实体类型,扩展了实体提取的范围 --- lightrag/llm.py | 7 ++++++- lightrag/prompt.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lightrag/llm.py b/lightrag/llm.py index d725ea85..e0277248 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -29,7 +29,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM from .utils import ( wrap_embedding_func_with_attrs, locate_json_string_body_from_string, - safe_unicode_decode, + safe_unicode_decode, logger, ) import sys @@ -69,6 +69,11 @@ async def openai_complete_if_cache( messages.extend(history_messages) messages.append({"role": "user", "content": prompt}) + # 添加日志输出 + logger.debug("===== Query Input to LLM =====") + logger.debug(f"Query: {prompt}") + logger.debug(f"System prompt: {system_prompt}") + logger.debug("Full context:") if "response_format" in kwargs: response = await openai_async_client.beta.chat.completions.parse( model=model, messages=messages, **kwargs diff --git a/lightrag/prompt.py b/lightrag/prompt.py index b62f02b5..d5674f15 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -8,7 +8,7 @@ PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##" PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>" PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] -PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"] +PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"] PROMPTS["entity_extraction"] = """-Goal- Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.