From 0a41cc8a9aa33bb3d91f8ea6290e3423721eda9f Mon Sep 17 00:00:00 2001
From: Magic_yuan <317617749@qq.com>
Date: Wed, 11 Dec 2024 12:45:10 +0800
Subject: [PATCH] =?UTF-8?q?feat(llm,=20prompt):=E5=A2=9E=E5=8A=A0=E6=97=A5?=
 =?UTF-8?q?=E5=BF=97=E8=BE=93=E5=87=BA=E5=B9=B6=E6=89=A9=E5=B1=95=E5=AE=9E?=
 =?UTF-8?q?=E4=BD=93=E7=B1=BB=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 在 llm.py 中添加了日志输出，用于调试和记录 LLM 查询输入
- 在 prompt.py 中增加了 "category" 实体类型，扩展了实体提取的范围
---
 lightrag/llm.py    | 7 ++++++-
 lightrag/prompt.py | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/lightrag/llm.py b/lightrag/llm.py
index d725ea85..e0277248 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -29,7 +29,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from .utils import (
     wrap_embedding_func_with_attrs,
     locate_json_string_body_from_string,
-    safe_unicode_decode,
+    safe_unicode_decode, logger,
 )
 
 import sys
@@ -69,6 +69,11 @@ async def openai_complete_if_cache(
     messages.extend(history_messages)
     messages.append({"role": "user", "content": prompt})
 
+    # 添加日志输出
+    logger.debug("===== Query Input to LLM =====")
+    logger.debug(f"Query: {prompt}")
+    logger.debug(f"System prompt: {system_prompt}")
+    logger.debug("Full context:")
     if "response_format" in kwargs:
         response = await openai_async_client.beta.chat.completions.parse(
             model=model, messages=messages, **kwargs
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index b62f02b5..d5674f15 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -8,7 +8,7 @@ PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
 PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
 PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
 
-PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event"]
+PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
 
 PROMPTS["entity_extraction"] = """-Goal-
 Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.