Merge pull request #1545 from danielaskdd/change-naive-context-to-json

Optimize query context format for mix and naive mode query
2025-05-08 04:30:01 +08:00
parent ade1425f1f 08e532eaf3
commit 606df5fe68
8 changed files with 232 additions and 588 deletions
--- a/lightrag/api/init.py
+++ b/lightrag/api/init.py
@@ -1 +1 @@
-__api_version__ = "0168"
+__api_version__ = "0169"
--- a/lightrag/api/routers/query_routes.py
+++ b/lightrag/api/routers/query_routes.py
@@ -67,16 +67,6 @@ class QueryRequest(BaseModel):
        description="Maximum number of tokens allocated for entity descriptions in local retrieval.",
    )
    hl_keywords: Optional[List[str]] = Field(
        default=None,
        description="List of high-level keywords to prioritize in retrieval.",
    )
    ll_keywords: Optional[List[str]] = Field(
        default=None,
        description="List of low-level keywords to refine retrieval focus.",
    )
    conversation_history: Optional[List[Dict[str, Any]]] = Field(
        default=None,
        description="Stores past conversation history to maintain context. Format: [{'role': 'user/assistant', 'content': 'message'}].",
@@ -93,20 +83,6 @@ class QueryRequest(BaseModel):
    def query_strip_after(cls, query: str) -> str:
        return query.strip()
    @field_validator("hl_keywords", mode="after")
    @classmethod
    def hl_keywords_strip_after(cls, hl_keywords: List[str] | None) -> List[str] | None:
        if hl_keywords is None:
            return None
        return [keyword.strip() for keyword in hl_keywords]
    @field_validator("ll_keywords", mode="after")
    @classmethod
    def ll_keywords_strip_after(cls, ll_keywords: List[str] | None) -> List[str] | None:
        if ll_keywords is None:
            return None
        return [keyword.strip() for keyword in ll_keywords]
    @field_validator("conversation_history", mode="after")
    @classmethod
    def conversation_history_role_check(
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -53,7 +53,6 @@ from .operate import (
    extract_entities,
    merge_nodes_and_edges,
    kg_query,
    mix_kg_vector_query,
    naive_query,
    query_with_keywords,
 )
@@ -1437,8 +1436,10 @@ class LightRAG:
        """
        # If a custom model is provided in param, temporarily update global config
        global_config = asdict(self)
        # Save original query for vector search
        param.original_query = query
-        if param.mode in ["local", "global", "hybrid"]:
+        if param.mode in ["local", "global", "hybrid", "mix"]:
            response = await kg_query(
                query.strip(),
                self.chunk_entity_relation_graph,
@@ -1447,30 +1448,17 @@ class LightRAG:
                self.text_chunks,
                param,
                global_config,
-                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
+                hashing_kv=self.llm_response_cache,
                system_prompt=system_prompt,
                chunks_vdb=self.chunks_vdb,
            )
        elif param.mode == "naive":
            response = await naive_query(
                query.strip(),
                self.chunks_vdb,
                self.text_chunks,
                param,
                global_config,
-                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
+                hashing_kv=self.llm_response_cache,
                system_prompt=system_prompt,
            )
        elif param.mode == "mix":
            response = await mix_kg_vector_query(
                query.strip(),
                self.chunk_entity_relation_graph,
                self.entities_vdb,
                self.relationships_vdb,
                self.chunks_vdb,
                self.text_chunks,
                param,
                global_config,
                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
                system_prompt=system_prompt,
            )
        elif param.mode == "bypass":
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -2,7 +2,6 @@ from __future__ import annotations
 from functools import partial
 import asyncio
 import traceback
 import json
 import re
 import os
@@ -26,7 +25,6 @@ from .utils import (
    CacheData,
    get_conversation_turns,
    use_llm_func_with_cache,
    list_of_list_to_json,
 )
 from .base import (
    BaseGraphStorage,
@@ -859,6 +857,7 @@ async def kg_query(
    global_config: dict[str, str],
    hashing_kv: BaseKVStorage | None = None,
    system_prompt: str | None = None,
    chunks_vdb: BaseVectorStorage = None,
 ) -> str | AsyncIterator[str]:
    if query_param.model_func:
        use_model_func = query_param.model_func
@@ -911,6 +910,7 @@ async def kg_query(
        relationships_vdb,
        text_chunks_db,
        query_param,
        chunks_vdb,
    )
    if query_param.only_need_context:
@@ -1110,169 +1110,85 @@ async def extract_keywords_only(
    return hl_keywords, ll_keywords
-async def mix_kg_vector_query(
+async def _get_vector_context(
    query: str,
    knowledge_graph_inst: BaseGraphStorage,
    entities_vdb: BaseVectorStorage,
    relationships_vdb: BaseVectorStorage,
    chunks_vdb: BaseVectorStorage,
    text_chunks_db: BaseKVStorage,
    query_param: QueryParam,
-    global_config: dict[str, str],
+    tokenizer: Tokenizer,
-    hashing_kv: BaseKVStorage | None = None,
+) -> tuple[list, list, list] | None:
    system_prompt: str | None = None,
 ) -> str | AsyncIterator[str]:
    """
-    Hybrid retrieval implementation combining knowledge graph and vector search.
+    Retrieve vector context from the vector database.
-    This function performs a hybrid search by:
+    This function performs vector search to find relevant text chunks for a query,
-    1. Extracting semantic information from knowledge graph
+    formats them with file path and creation time information.
-    2. Retrieving relevant text chunks through vector similarity
+
-    3. Combining both results for comprehensive answer generation
+    Args:
        query: The query string to search for
        chunks_vdb: Vector database containing document chunks
        query_param: Query parameters including top_k and ids
        tokenizer: Tokenizer for counting tokens
    Returns:
        Tuple (empty_entities, empty_relations, text_units) for combine_contexts,
        compatible with _get_edge_data and _get_node_data format
    """
-    # get tokenizer
+    try:
-    tokenizer: Tokenizer = global_config["tokenizer"]
+        results = await chunks_vdb.query(
            query, top_k=query_param.top_k, ids=query_param.ids
        )
        if not results:
            return [], [], []
-    if query_param.model_func:
+        valid_chunks = []
-        use_model_func = query_param.model_func
+        for result in results:
-    else:
+            if "content" in result:
-        use_model_func = global_config["llm_model_func"]
+                # Directly use content from chunks_vdb.query result
-        # Apply higher priority (5) to query relation LLM function
+                chunk_with_time = {
-        use_model_func = partial(use_model_func, _priority=5)
+                    "content": result["content"],
                    "created_at": result.get("created_at", None),
                    "file_path": result.get("file_path", "unknown_source"),
                }
                valid_chunks.append(chunk_with_time)
-    # 1. Cache handling
+        if not valid_chunks:
-    args_hash = compute_args_hash("mix", query, cache_type="query")
+            return [], [], []
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, "mix", cache_type="query"
    )
    if cached_response is not None:
        return cached_response
-    # Process conversation history
+        maybe_trun_chunks = truncate_list_by_token_size(
-    history_context = ""
+            valid_chunks,
-    if query_param.conversation_history:
+            key=lambda x: x["content"],
-        history_context = get_conversation_turns(
+            max_token_size=query_param.max_token_for_text_unit,
-            query_param.conversation_history, query_param.history_turns
+            tokenizer=tokenizer,
        )
-    # 2. Execute knowledge graph and vector searches in parallel
+        logger.debug(
-    async def get_kg_context():
+            f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
-        try:
+        )
-            hl_keywords, ll_keywords = await get_keywords_from_query(
+        logger.info(
-                query, query_param, global_config, hashing_kv
+            f"Vector query: {len(maybe_trun_chunks)} chunks, top_k: {query_param.top_k}"
            )
            if not hl_keywords and not ll_keywords:
                logger.warning("Both high-level and low-level keywords are empty")
                return None
            # Convert keyword lists to strings
            ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
            hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
            # Set query mode based on available keywords
            if not ll_keywords_str and not hl_keywords_str:
                return None
            elif not ll_keywords_str:
                query_param.mode = "global"
            elif not hl_keywords_str:
                query_param.mode = "local"
            else:
                query_param.mode = "hybrid"
            # Build knowledge graph context
            context = await _build_query_context(
                ll_keywords_str,
                hl_keywords_str,
                knowledge_graph_inst,
                entities_vdb,
                relationships_vdb,
                text_chunks_db,
                query_param,
            )
            return context
        except Exception as e:
            logger.error(f"Error in get_kg_context: {str(e)}")
            traceback.print_exc()
            return None
    # 3. Execute both retrievals in parallel
    kg_context, vector_context = await asyncio.gather(
        get_kg_context(), _get_vector_context(query, chunks_vdb, query_param, tokenizer)
    )
    # 4. Merge contexts
    if kg_context is None and vector_context is None:
        return PROMPTS["fail_response"]
    if query_param.only_need_context:
        context_str = f"""
 \r\n\r\n=====Knowledge Graph Context=====\r\n\r\n
 {kg_context if kg_context else "No relevant knowledge graph information found"}
 \r\n\r\n=====Vector Context=====\r\n\r\n
 {vector_context if vector_context else "No relevant text information found"}
 """.strip()
        return context_str
    # 5. Construct hybrid prompt
    sys_prompt = (
        system_prompt if system_prompt else PROMPTS["mix_rag_response"]
    ).format(
        kg_context=kg_context
        if kg_context
        else "No relevant knowledge graph information found",
        vector_context=vector_context
        if vector_context
        else "No relevant text information found",
        response_type=query_param.response_type,
        history=history_context,
    )
    if query_param.only_need_prompt:
        return sys_prompt
    len_of_prompts = len(tokenizer.encode(query + sys_prompt))
    logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
    # 6. Generate response
    response = await use_model_func(
        query,
        system_prompt=sys_prompt,
        stream=query_param.stream,
    )
    # Clean up response content
    if isinstance(response, str) and len(response) > len(sys_prompt):
        response = (
            response.replace(sys_prompt, "")
            .replace("user", "")
            .replace("model", "")
            .replace(query, "")
            .replace("<system>", "")
            .replace("</system>", "")
            .strip()
        )
-        if hashing_kv.global_config.get("enable_llm_cache"):
+        if not maybe_trun_chunks:
-            # 7. Save cache - Only cache after collecting complete response
+            return [], [], []
-            await save_to_cache(
+
-                hashing_kv,
+        # Create empty entities and relations contexts
-                CacheData(
+        entities_context = []
-                    args_hash=args_hash,
+        relations_context = []
-                    content=response,
+
-                    prompt=query,
+        # Create text_units_context directly as a list of dictionaries
-                    quantized=quantized,
+        text_units_context = []
-                    min_val=min_val,
+        for i, chunk in enumerate(maybe_trun_chunks):
-                    max_val=max_val,
+            text_units_context.append(
-                    mode="mix",
+                {
-                    cache_type="query",
+                    "id": i + 1,
-                ),
+                    "content": chunk["content"],
                    "file_path": chunk["file_path"],
                }
            )
-    return response
+        return entities_context, relations_context, text_units_context
    except Exception as e:
        logger.error(f"Error in _get_vector_context: {e}")
        return [], [], []
 async def _build_query_context(
@@ -1283,8 +1199,11 @@ async def _build_query_context(
    relationships_vdb: BaseVectorStorage,
    text_chunks_db: BaseKVStorage,
    query_param: QueryParam,
    chunks_vdb: BaseVectorStorage = None,  # Add chunks_vdb parameter for mix mode
 ):
-    logger.info(f"Process {os.getpid()} buidling query context...")
+    logger.info(f"Process {os.getpid()} building query context...")
    # Handle local and global modes as before
    if query_param.mode == "local":
        entities_context, relations_context, text_units_context = await _get_node_data(
            ll_keywords,
@@ -1301,7 +1220,7 @@ async def _build_query_context(
            text_chunks_db,
            query_param,
        )
-    else:  # hybrid mode
+    else:  # hybrid or mix mode
        ll_data = await _get_node_data(
            ll_keywords,
            knowledge_graph_inst,
@@ -1329,10 +1248,43 @@ async def _build_query_context(
            hl_text_units_context,
        ) = hl_data
-        entities_context, relations_context, text_units_context = combine_contexts(
+        # Initialize vector data with empty lists
-            [hl_entities_context, ll_entities_context],
+        vector_entities_context, vector_relations_context, vector_text_units_context = (
-            [hl_relations_context, ll_relations_context],
+            [],
-            [hl_text_units_context, ll_text_units_context],
+            [],
            [],
        )
        # Only get vector data if in mix mode
        if query_param.mode == "mix" and hasattr(query_param, "original_query"):
            # Get tokenizer from text_chunks_db
            tokenizer = text_chunks_db.global_config.get("tokenizer")
            # Get vector context in triple format
            vector_data = await _get_vector_context(
                query_param.original_query,  # We need to pass the original query
                chunks_vdb,
                query_param,
                tokenizer,
            )
            # If vector_data is not None, unpack it
            if vector_data is not None:
                (
                    vector_entities_context,
                    vector_relations_context,
                    vector_text_units_context,
                ) = vector_data
        # Combine and deduplicate the entities, relationships, and sources
        entities_context = process_combine_contexts(
            hl_entities_context, ll_entities_context, vector_entities_context
        )
        relations_context = process_combine_contexts(
            hl_relations_context, ll_relations_context, vector_relations_context
        )
        text_units_context = process_combine_contexts(
            hl_text_units_context, ll_text_units_context, vector_text_units_context
        )
    # not necessary to use LLM to generate a response
    if not entities_context and not relations_context:
@@ -1440,17 +1392,7 @@ async def _get_node_data(
    )
    # build prompt
-    entites_section_list = [
+    entities_context = []
        [
            "id",
            "entity",
            "type",
            "description",
            "rank",
            "created_at",
            "file_path",
        ]
    ]
    for i, n in enumerate(node_datas):
        created_at = n.get("created_at", "UNKNOWN")
        if isinstance(created_at, (int, float)):
@@ -1459,32 +1401,19 @@ async def _get_node_data(
        # Get file path from node data
        file_path = n.get("file_path", "unknown_source")
-        entites_section_list.append(
+        entities_context.append(
-            [
+            {
-                i,
+                "id": i + 1,
-                n["entity_name"],
+                "entity": n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
+                "type": n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
-                n["rank"],
+                "rank": n["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    entities_context = list_of_list_to_json(entites_section_list)
-    relations_section_list = [
+    relations_context = []
        [
            "id",
            "source",
            "target",
            "description",
            "keywords",
            "weight",
            "rank",
            "created_at",
            "file_path",
        ]
    ]
    for i, e in enumerate(use_relations):
        created_at = e.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1494,27 +1423,29 @@ async def _get_node_data(
        # Get file path from edge data
        file_path = e.get("file_path", "unknown_source")
-        relations_section_list.append(
+        relations_context.append(
-            [
+            {
-                i,
+                "id": i + 1,
-                e["src_tgt"][0],
+                "entity1": e["src_tgt"][0],
-                e["src_tgt"][1],
+                "entity2": e["src_tgt"][1],
-                e["description"],
+                "description": e["description"],
-                e["keywords"],
+                "keywords": e["keywords"],
-                e["weight"],
+                "weight": e["weight"],
-                e["rank"],
+                "rank": e["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    relations_context = list_of_list_to_json(relations_section_list)
-    text_units_section_list = [["id", "content", "file_path"]]
+    text_units_context = []
    for i, t in enumerate(use_text_units):
-        text_units_section_list.append(
+        text_units_context.append(
-            [i, t["content"], t.get("file_path", "unknown_source")]
+            {
                "id": i + 1,
                "content": t["content"],
                "file_path": t.get("file_path", "unknown_source"),
            }
        )
    text_units_context = list_of_list_to_json(text_units_section_list)
    return entities_context, relations_context, text_units_context
@@ -1757,19 +1688,7 @@ async def _get_edge_data(
        f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
    )
-    relations_section_list = [
+    relations_context = []
        [
            "id",
            "source",
            "target",
            "description",
            "keywords",
            "weight",
            "rank",
            "created_at",
            "file_path",
        ]
    ]
    for i, e in enumerate(edge_datas):
        created_at = e.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1779,24 +1698,21 @@ async def _get_edge_data(
        # Get file path from edge data
        file_path = e.get("file_path", "unknown_source")
-        relations_section_list.append(
+        relations_context.append(
-            [
+            {
-                i,
+                "id": i + 1,
-                e["src_id"],
+                "entity1": e["src_id"],
-                e["tgt_id"],
+                "entity2": e["tgt_id"],
-                e["description"],
+                "description": e["description"],
-                e["keywords"],
+                "keywords": e["keywords"],
-                e["weight"],
+                "weight": e["weight"],
-                e["rank"],
+                "rank": e["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    relations_context = list_of_list_to_json(relations_section_list)
-    entites_section_list = [
+    entities_context = []
        ["id", "entity", "type", "description", "rank", "created_at", "file_path"]
    ]
    for i, n in enumerate(use_entities):
        created_at = n.get("created_at", "UNKNOWN")
        # Convert timestamp to readable format
@@ -1806,23 +1722,27 @@ async def _get_edge_data(
        # Get file path from node data
        file_path = n.get("file_path", "unknown_source")
-        entites_section_list.append(
+        entities_context.append(
-            [
+            {
-                i,
+                "id": i + 1,
-                n["entity_name"],
+                "entity": n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
+                "type": n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
-                n["rank"],
+                "rank": n["rank"],
-                created_at,
+                "created_at": created_at,
-                file_path,
+                "file_path": file_path,
-            ]
+            }
        )
    entities_context = list_of_list_to_json(entites_section_list)
-    text_units_section_list = [["id", "content", "file_path"]]
+    text_units_context = []
    for i, t in enumerate(use_text_units):
-        text_units_section_list.append([i, t["content"], t.get("file_path", "unknown")])
+        text_units_context.append(
-    text_units_context = list_of_list_to_json(text_units_section_list)
+            {
                "id": i + 1,
                "content": t["content"],
                "file_path": t.get("file_path", "unknown"),
            }
        )
    return entities_context, relations_context, text_units_context
@@ -1938,29 +1858,9 @@ async def _find_related_text_unit_from_relationships(
    return all_text_units
 def combine_contexts(entities, relationships, sources):
    # Function to extract entities, relationships, and sources from context strings
    hl_entities, ll_entities = entities[0], entities[1]
    hl_relationships, ll_relationships = relationships[0], relationships[1]
    hl_sources, ll_sources = sources[0], sources[1]
    # Combine and deduplicate the entities
    combined_entities = process_combine_contexts(hl_entities, ll_entities)
    # Combine and deduplicate the relationships
    combined_relationships = process_combine_contexts(
        hl_relationships, ll_relationships
    )
    # Combine and deduplicate the sources
    combined_sources = process_combine_contexts(hl_sources, ll_sources)
    return combined_entities, combined_relationships, combined_sources
 async def naive_query(
    query: str,
    chunks_vdb: BaseVectorStorage,
    text_chunks_db: BaseKVStorage,
    query_param: QueryParam,
    global_config: dict[str, str],
    hashing_kv: BaseKVStorage | None = None,
@@ -1982,14 +1882,24 @@ async def naive_query(
        return cached_response
    tokenizer: Tokenizer = global_config["tokenizer"]
    section = await _get_vector_context(query, chunks_vdb, query_param, tokenizer)
-    if section is None:
+    _, _, text_units_context = await _get_vector_context(
        query, chunks_vdb, query_param, tokenizer
    )
    if text_units_context is None or len(text_units_context) == 0:
        return PROMPTS["fail_response"]
    text_units_str = json.dumps(text_units_context, ensure_ascii=False)
    if query_param.only_need_context:
-        return section
+        return f"""
 ---Document Chunks---
 ```json
 {text_units_str}
 ```
 """
    # Process conversation history
    history_context = ""
    if query_param.conversation_history:
@@ -1999,7 +1909,7 @@ async def naive_query(
    sys_prompt_temp = system_prompt if system_prompt else PROMPTS["naive_rag_response"]
    sys_prompt = sys_prompt_temp.format(
-        content_data=section,
+        content_data=text_units_str,
        response_type=query_param.response_type,
        history=history_context,
    )
@@ -2056,6 +1966,9 @@ async def kg_query_with_keywords(
    query_param: QueryParam,
    global_config: dict[str, str],
    hashing_kv: BaseKVStorage | None = None,
    ll_keywords: list[str] = [],
    hl_keywords: list[str] = [],
    chunks_vdb: BaseVectorStorage | None = None,
 ) -> str | AsyncIterator[str]:
    """
    Refactored kg_query that does NOT extract keywords by itself.
@@ -2069,9 +1982,6 @@ async def kg_query_with_keywords(
        # Apply higher priority (5) to query relation LLM function
        use_model_func = partial(use_model_func, _priority=5)
    # ---------------------------
    # 1) Handle potential cache for query results
    # ---------------------------
    args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
@@ -2079,14 +1989,6 @@ async def kg_query_with_keywords(
    if cached_response is not None:
        return cached_response
    # ---------------------------
    # 2) RETRIEVE KEYWORDS FROM query_param
    # ---------------------------
    # If these fields don't exist, default to empty lists/strings.
    hl_keywords = getattr(query_param, "hl_keywords", []) or []
    ll_keywords = getattr(query_param, "ll_keywords", []) or []
    # If neither has any keywords, you could handle that logic here.
    if not hl_keywords and not ll_keywords:
        logger.warning(
@@ -2100,25 +2002,9 @@ async def kg_query_with_keywords(
        logger.warning("high_level_keywords is empty, switching to local mode.")
        query_param.mode = "local"
-    # Flatten low-level and high-level keywords if needed
+    ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
-    ll_keywords_flat = (
+    hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
        [item for sublist in ll_keywords for item in sublist]
        if any(isinstance(i, list) for i in ll_keywords)
        else ll_keywords
    )
    hl_keywords_flat = (
        [item for sublist in hl_keywords for item in sublist]
        if any(isinstance(i, list) for i in hl_keywords)
        else hl_keywords
    )
    # Join the flattened lists
    ll_keywords_str = ", ".join(ll_keywords_flat) if ll_keywords_flat else ""
    hl_keywords_str = ", ".join(hl_keywords_flat) if hl_keywords_flat else ""
    # ---------------------------
    # 3) BUILD CONTEXT
    # ---------------------------
    context = await _build_query_context(
        ll_keywords_str,
        hl_keywords_str,
@@ -2127,18 +2013,14 @@ async def kg_query_with_keywords(
        relationships_vdb,
        text_chunks_db,
        query_param,
        chunks_vdb=chunks_vdb,
    )
    if not context:
        return PROMPTS["fail_response"]
    # If only context is needed, return it
    if query_param.only_need_context:
        return context
    # ---------------------------
    # 4) BUILD THE SYSTEM PROMPT + CALL LLM
    # ---------------------------
    # Process conversation history
    history_context = ""
    if query_param.conversation_history:
@@ -2180,7 +2062,6 @@ async def kg_query_with_keywords(
        )
        if hashing_kv.global_config.get("enable_llm_cache"):
            # 7. Save cache - 只有在收集完整响应后才缓存
            await save_to_cache(
                hashing_kv,
                CacheData(
@@ -2198,85 +2079,6 @@ async def kg_query_with_keywords(
    return response
 async def _get_vector_context(
    query: str,
    chunks_vdb: BaseVectorStorage,
    query_param: QueryParam,
    tokenizer: Tokenizer,
 ) -> str | None:
    """
    Retrieve vector context from the vector database.
    This function performs vector search to find relevant text chunks for a query,
    formats them with file path and creation time information, and truncates
    the results to fit within token limits.
    Args:
        query: The query string to search for
        chunks_vdb: Vector database containing document chunks
        query_param: Query parameters including top_k and ids
        tokenizer: Tokenizer for counting tokens
    Returns:
        Formatted string containing relevant text chunks, or None if no results found
    """
    try:
        # Reduce top_k for vector search in hybrid mode since we have structured information from KG
        mix_topk = (
            min(10, query_param.top_k)
            if hasattr(query_param, "mode") and query_param.mode == "mix"
            else query_param.top_k
        )
        results = await chunks_vdb.query(query, top_k=mix_topk, ids=query_param.ids)
        if not results:
            return None
        valid_chunks = []
        for result in results:
            if "content" in result:
                # Directly use content from chunks_vdb.query result
                chunk_with_time = {
                    "content": result["content"],
                    "created_at": result.get("created_at", None),
                    "file_path": result.get("file_path", None),
                }
                valid_chunks.append(chunk_with_time)
        if not valid_chunks:
            return None
        maybe_trun_chunks = truncate_list_by_token_size(
            valid_chunks,
            key=lambda x: x["content"],
            max_token_size=query_param.max_token_for_text_unit,
            tokenizer=tokenizer,
        )
        logger.debug(
            f"Truncate chunks from {len(valid_chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
        )
        logger.info(f"Vector query: {len(maybe_trun_chunks)} chunks, top_k: {mix_topk}")
        if not maybe_trun_chunks:
            return None
        # Include time information in content
        formatted_chunks = []
        for c in maybe_trun_chunks:
            chunk_text = "File path: " + c["file_path"] + "\r\n\r\n" + c["content"]
            if c["created_at"]:
                chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\r\n\r\n{chunk_text}"
            formatted_chunks.append(chunk_text)
        logger.debug(
            f"Truncate chunks from {len(valid_chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
        )
        return "\r\n\r\n--New Chunk--\r\n\r\n".join(formatted_chunks)
    except Exception as e:
        logger.error(f"Error in _get_vector_context: {e}")
        return None
 async def query_with_keywords(
    query: str,
    prompt: str,
@@ -2320,12 +2122,15 @@ async def query_with_keywords(
    )
    # Create a new string with the prompt and the keywords
-    ll_keywords_str = ", ".join(ll_keywords)
+    keywords_str = ", ".join(ll_keywords + hl_keywords)
-    hl_keywords_str = ", ".join(hl_keywords)
+    formatted_question = (
-    formatted_question = f"{prompt}\n\n### Keywords:\nHigh-level: {hl_keywords_str}\nLow-level: {ll_keywords_str}\n\n### Query:\n{query}"
+        f"{prompt}\n\n### Keywords\n\n{keywords_str}\n\n### Query\n\n{query}"
    )
    param.original_query = query
    # Use appropriate query method based on mode
-    if param.mode in ["local", "global", "hybrid"]:
+    if param.mode in ["local", "global", "hybrid", "mix"]:
        return await kg_query_with_keywords(
            formatted_question,
            knowledge_graph_inst,
@@ -2335,6 +2140,9 @@ async def query_with_keywords(
            param,
            global_config,
            hashing_kv=hashing_kv,
            hl_keywords=hl_keywords,
            ll_keywords=ll_keywords,
            chunks_vdb=chunks_vdb,
        )
    elif param.mode == "naive":
        return await naive_query(
@@ -2345,17 +2153,5 @@ async def query_with_keywords(
            global_config,
            hashing_kv=hashing_kv,
        )
    elif param.mode == "mix":
        return await mix_kg_vector_query(
            formatted_question,
            knowledge_graph_inst,
            entities_vdb,
            relationships_vdb,
            chunks_vdb,
            text_chunks_db,
            param,
            global_config,
            hashing_kv=hashing_kv,
        )
    else:
        raise ValueError(f"Unknown mode {param.mode}")
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -311,7 +311,7 @@ When handling content with timestamps:
 ---Conversation History---
 {history}
---Document Chunks---
+---Document Chunks(DC)---
 {content_data}
 ---Response Rules---
@@ -320,7 +320,7 @@ When handling content with timestamps:
 - Use markdown formatting with appropriate section headings
 - Please respond in the same language as the user's question.
 - Ensure the response maintains continuity with the conversation history.
- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
+- List up to 5 most important reference sources at the end under "References" section. Clearly indicating each source from Document Chunks(DC), and include the file path if available, in the following format: [DC] file_path
 - If you don't know the answer, just say so.
 - Do not include information not provided by the Document Chunks."""
@@ -347,41 +347,3 @@ Similarity score criteria:
 0.5: Partially related and answer needs modification to be used
 Return only a number between 0-1, without any additional content.
 """
 PROMPTS["mix_rag_response"] = """---Role---
 You are a helpful assistant responding to user query about Data Sources provided below.
 ---Goal---
 Generate a concise response based on Data Sources and follow Response Rules, considering both the conversation history and the current query. Data sources contain two parts: Knowledge Graph(KG) and Document Chunks(DC). Summarize all information in the provided Data Sources, and incorporating general knowledge relevant to the Data Sources. Do not include information not provided by Data Sources.
 When handling information with timestamps:
 1. Each piece of information (both relationships and content) has a "created_at" timestamp indicating when we acquired this knowledge
 2. When encountering conflicting information, consider both the content/relationship and the timestamp
 3. Don't automatically prefer the most recent information - use judgment based on the context
 4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
 ---Conversation History---
 {history}
 ---Data Sources---
 1. From Knowledge Graph(KG):
 {kg_context}
 2. From Document Chunks(DC):
 {vector_context}
 ---Response Rules---
 - Target format and length: {response_type}
 - Use markdown formatting with appropriate section headings
 - Please respond in the same language as the user's question.
 - Ensure the response maintains continuity with the conversation history.
 - Organize answer in sections focusing on one main point or aspect of the answer
 - Use clear and descriptive section titles that reflect the content
 - List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] file_path
 - If you don't know the answer, just say so. Do not make anything up.
 - Do not include information not provided by the Data Sources."""
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -719,26 +719,6 @@ def truncate_list_by_token_size(
    return list_data
 def list_of_list_to_json(data: list[list[str]]) -> list[dict[str, str]]:
    if not data or len(data) <= 1:
        return []
    header = data[0]
    result = []
    for row in data[1:]:
        if len(row) >= 2:
            item = {}
            for i, field_name in enumerate(header):
                if i < len(row):
                    item[field_name] = str(row[i])
                else:
                    item[field_name] = ""
            result.append(item)
    return result
 def save_data_to_file(data, file_name):
    with open(file_name, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
@@ -804,21 +784,33 @@ def xml_to_json(xml_file):
        return None
-def process_combine_contexts(
+def process_combine_contexts(*context_lists):
-    hl_context: list[dict[str, str]], ll_context: list[dict[str, str]]
+    """
-):
+    Combine multiple context lists and remove duplicate content
    Args:
        *context_lists: Any number of context lists
    Returns:
        Combined context list with duplicates removed
    """
    seen_content = {}
    combined_data = []
-    for item in hl_context + ll_context:
+    # Iterate through all input context lists
-        content_dict = {k: v for k, v in item.items() if k != "id"}
+    for context_list in context_lists:
-        content_key = tuple(sorted(content_dict.items()))
+        if not context_list:  # Skip empty lists
-        if content_key not in seen_content:
+            continue
-            seen_content[content_key] = item
+        for item in context_list:
-            combined_data.append(item)
+            content_dict = {k: v for k, v in item.items() if k != "id"}
            content_key = tuple(sorted(content_dict.items()))
            if content_key not in seen_content:
                seen_content[content_key] = item
                combined_data.append(item)
    # Reassign IDs
    for i, item in enumerate(combined_data):
-        item["id"] = str(i)
+        item["id"] = str(i + 1)
    return combined_data
--- a/lightrag_webui/src/api/lightrag.ts
+++ b/lightrag_webui/src/api/lightrag.ts
@@ -94,10 +94,6 @@ export type QueryRequest = {
  max_token_for_global_context?: number
  /** Maximum number of tokens allocated for entity descriptions in local retrieval. */
  max_token_for_local_context?: number
  /** List of high-level keywords to prioritize in retrieval. */
  hl_keywords?: string[]
  /** List of low-level keywords to refine retrieval focus. */
  ll_keywords?: string[]
  /**
   * Stores past conversation history to maintain context.
   * Format: [{"role": "user/assistant", "content": "message"}].
--- a/lightrag_webui/src/components/retrieval/QuerySettings.tsx
+++ b/lightrag_webui/src/components/retrieval/QuerySettings.tsx
@@ -1,7 +1,6 @@
 import { useCallback } from 'react'
 import { QueryMode, QueryRequest } from '@/api/lightrag'
 // Removed unused import for Text component
 import Input from '@/components/ui/Input'
 import Checkbox from '@/components/ui/Checkbox'
 import NumberInput from '@/components/ui/NumberInput'
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/Card'
@@ -242,71 +241,6 @@ export default function QuerySettings() {
              </div>
            </>
            {/* Keywords */}
            <>
              <>
                <TooltipProvider>
                  <Tooltip>
                    <TooltipTrigger asChild>
                      <label htmlFor="hl_keywords" className="ml-1 cursor-help">
                        {t('retrievePanel.querySettings.hlKeywords')}
                      </label>
                    </TooltipTrigger>
                    <TooltipContent side="left">
                      <p>{t('retrievePanel.querySettings.hlKeywordsTooltip')}</p>
                    </TooltipContent>
                  </Tooltip>
                </TooltipProvider>
                <div>
                  {/* Removed sr-only label */}
                  <Input
                    id="hl_keywords"
                    type="text"
                    value={querySettings.hl_keywords?.join(', ')}
                    onChange={(e) => {
                      const keywords = e.target.value
                        .split(',')
                        .map((k) => k.trim())
                        .filter((k) => k !== '')
                      handleChange('hl_keywords', keywords)
                    }}
                    placeholder={t('retrievePanel.querySettings.hlkeywordsPlaceHolder')}
                  />
                </div>
              </>
              <>
                <TooltipProvider>
                  <Tooltip>
                    <TooltipTrigger asChild>
                      <label htmlFor="ll_keywords" className="ml-1 cursor-help">
                        {t('retrievePanel.querySettings.llKeywords')}
                      </label>
                    </TooltipTrigger>
                    <TooltipContent side="left">
                      <p>{t('retrievePanel.querySettings.llKeywordsTooltip')}</p>
                    </TooltipContent>
                  </Tooltip>
                </TooltipProvider>
                <div>
                  {/* Removed sr-only label */}
                  <Input
                    id="ll_keywords"
                    type="text"
                    value={querySettings.ll_keywords?.join(', ')}
                    onChange={(e) => {
                      const keywords = e.target.value
                        .split(',')
                        .map((k) => k.trim())
                        .filter((k) => k !== '')
                      handleChange('ll_keywords', keywords)
                    }}
                    placeholder={t('retrievePanel.querySettings.hlkeywordsPlaceHolder')}
                  />
                </div>
              </>
            </>
            {/* Toggle Options */}
            <>
              <div className="flex items-center gap-2">
`@@ -1 +1 @@`
	`__api_version__ = "0168"`	`__api_version__ = "0169"`