Merge pull request #523 from magicyuan876/main

feat(lightrag): Implement mix search mode combining knowledge graph a…
2024-12-28 20:08:39 +08:00
parent 8a0b59ca56 aaaf617451
commit aad3b6fdc3
5 changed files with 305 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -106,8 +106,21 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
 # Perform hybrid search
 print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))
 # Perform mix search (Knowledge Graph + Vector Retrieval)
 # Mix mode combines knowledge graph and vector search:
 # - Uses both structured (KG) and unstructured (vector) information
 # - Provides comprehensive answers by analyzing relationships and context
 # - Supports image content through HTML img tags
 # - Allows control over retrieval depth via top_k parameter
 print(rag.query("What are the top themes in this story?", param=QueryParam(
    mode="mix")))
 ```
 <details>
 <summary> Using Open AI-like APIs </summary>
@@ -262,7 +275,7 @@ In order to run this experiment on low RAM GPU you should select small model and
 ```python
 class QueryParam:
-    mode: Literal["local", "global", "hybrid", "naive"] = "global"
+    mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global"
    only_need_context: bool = False
    response_type: str = "Multiple Paragraphs"
    # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -16,7 +16,7 @@ T = TypeVar("T")
@dataclass
 class QueryParam:
-    mode: Literal["local", "global", "hybrid", "naive"] = "global"
+    mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global"
    only_need_context: bool = False
    only_need_prompt: bool = False
    response_type: str = "Multiple Paragraphs"
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -16,6 +16,7 @@ from .operate import (
    # local_query,global_query,hybrid_query,
    kg_query,
    naive_query,
    mix_kg_vector_query,
 )
 from .utils import (
@@ -630,6 +631,25 @@ class LightRAG:
                    embedding_func=None,
                ),
            )
        elif param.mode == "mix":
            response = await mix_kg_vector_query(
                query,
                self.chunk_entity_relation_graph,
                self.entities_vdb,
                self.relationships_vdb,
                self.chunks_vdb,
                self.text_chunks,
                param,
                asdict(self),
                hashing_kv=self.llm_response_cache
                if self.llm_response_cache
                and hasattr(self.llm_response_cache, "global_config")
                else self.key_string_value_json_storage_cls(
                    namespace="llm_response_cache",
                    global_config=asdict(self),
                    embedding_func=None,
                ),
            )
        else:
            raise ValueError(f"Unknown mode {param.mode}")
        await self._query_done()
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1147,3 +1147,195 @@ async def naive_query(
    )
    return response
 async def mix_kg_vector_query(
    query,
    knowledge_graph_inst: BaseGraphStorage,
    entities_vdb: BaseVectorStorage,
    relationships_vdb: BaseVectorStorage,
    chunks_vdb: BaseVectorStorage,
    text_chunks_db: BaseKVStorage[TextChunkSchema],
    query_param: QueryParam,
    global_config: dict,
    hashing_kv: BaseKVStorage = None,
 ) -> str:
    """
    Hybrid retrieval implementation combining knowledge graph and vector search.
    This function performs a hybrid search by:
    1. Extracting semantic information from knowledge graph
    2. Retrieving relevant text chunks through vector similarity
    3. Combining both results for comprehensive answer generation
    """
    # 1. Cache handling
    use_model_func = global_config["llm_model_func"]
    args_hash = compute_args_hash("mix", query)
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, "mix"
    )
    if cached_response is not None:
        return cached_response
    # 2. Execute knowledge graph and vector searches in parallel
    async def get_kg_context():
        try:
            # Reuse keyword extraction logic from kg_query
            example_number = global_config["addon_params"].get("example_number", None)
            if example_number and example_number < len(
                PROMPTS["keywords_extraction_examples"]
            ):
                examples = "\n".join(
                    PROMPTS["keywords_extraction_examples"][: int(example_number)]
                )
            else:
                examples = "\n".join(PROMPTS["keywords_extraction_examples"])
            language = global_config["addon_params"].get(
                "language", PROMPTS["DEFAULT_LANGUAGE"]
            )
            # Extract keywords using LLM
            kw_prompt = PROMPTS["keywords_extraction"].format(
                query=query, examples=examples, language=language
            )
            result = await use_model_func(kw_prompt, keyword_extraction=True)
            match = re.search(r"\{.*\}", result, re.DOTALL)
            if not match:
                logger.warning(
                    "No JSON-like structure found in keywords extraction result"
                )
                return None
            result = match.group(0)
            keywords_data = json.loads(result)
            hl_keywords = keywords_data.get("high_level_keywords", [])
            ll_keywords = keywords_data.get("low_level_keywords", [])
            if not hl_keywords and not ll_keywords:
                logger.warning("Both high-level and low-level keywords are empty")
                return None
            # Convert keyword lists to strings
            ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
            hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
            # Set query mode based on available keywords
            if not ll_keywords_str and not hl_keywords_str:
                return None
            elif not ll_keywords_str:
                query_param.mode = "global"
            elif not hl_keywords_str:
                query_param.mode = "local"
            else:
                query_param.mode = "hybrid"
            # Build knowledge graph context
            context = await _build_query_context(
                [ll_keywords_str, hl_keywords_str],
                knowledge_graph_inst,
                entities_vdb,
                relationships_vdb,
                text_chunks_db,
                query_param,
            )
            return context
        except Exception as e:
            logger.error(f"Error in get_kg_context: {str(e)}")
            return None
    async def get_vector_context():
        # Reuse vector search logic from naive_query
        try:
            # Reduce top_k for vector search in hybrid mode since we have structured information from KG
            mix_topk = min(10, query_param.top_k)
            results = await chunks_vdb.query(query, top_k=mix_topk)
            if not results:
                return None
            chunks_ids = [r["id"] for r in results]
            chunks = await text_chunks_db.get_by_ids(chunks_ids)
            valid_chunks = [
                chunk for chunk in chunks if chunk is not None and "content" in chunk
            ]
            if not valid_chunks:
                return None
            maybe_trun_chunks = truncate_list_by_token_size(
                valid_chunks,
                key=lambda x: x["content"],
                max_token_size=query_param.max_token_for_text_unit,
            )
            if not maybe_trun_chunks:
                return None
            return "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
        except Exception as e:
            logger.error(f"Error in get_vector_context: {e}")
            return None
    # 3. Execute both retrievals in parallel
    kg_context, vector_context = await asyncio.gather(
        get_kg_context(), get_vector_context()
    )
    # 4. Merge contexts
    if kg_context is None and vector_context is None:
        return PROMPTS["fail_response"]
    if query_param.only_need_context:
        return {"kg_context": kg_context, "vector_context": vector_context}
    # 5. Construct hybrid prompt
    sys_prompt = PROMPTS["mix_rag_response"].format(
        kg_context=kg_context
        if kg_context
        else "No relevant knowledge graph information found",
        vector_context=vector_context
        if vector_context
        else "No relevant text information found",
        response_type=query_param.response_type,
    )
    if query_param.only_need_prompt:
        return sys_prompt
    # 6. Generate response
    response = await use_model_func(
        query,
        system_prompt=sys_prompt,
        stream=query_param.stream,
    )
    if isinstance(response, str) and len(response) > len(sys_prompt):
        response = (
            response.replace(sys_prompt, "")
            .replace("user", "")
            .replace("model", "")
            .replace(query, "")
            .replace("<system>", "")
            .replace("</system>", "")
            .strip()
        )
    # 7. Save cache
    await save_to_cache(
        hashing_kv,
        CacheData(
            args_hash=args_hash,
            content=response,
            prompt=query,
            quantized=quantized,
            min_val=min_val,
            max_val=max_val,
            mode="mix",
        ),
    )
    return response
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -284,3 +284,81 @@ Similarity score criteria:
 0.5: Partially related and answer needs modification to be used
 Return only a number between 0-1, without any additional content.
 """
 PROMPTS["mix_rag_response"] = """---Role Definition---
 You are a professional knowledge integration assistant, responsible for answering questions strictly based on provided knowledge graph and text information. You must follow these rules:
 1. Only use provided knowledge graph and text information
 2. Do not use your own knowledge or experience
 3. Do not make any assumptions or speculations
 4. Analyze the language used in the user message and respond in the same language
 5. Include relevant images from the source information using HTML img tags
 ---Objective---
 Generate comprehensive and accurate answers based on knowledge graph and vector search information.
 First analyze the language of the user's question (Chinese/English/Others), then respond in the same language.
 In the following cases, respond politely with "I apologize, but I am unable to provide a complete answer to this question" in the user's language:
 1. No relevant information found in provided sources
 2. Question is beyond the scope of provided information
 3. Requires knowledge beyond provided information
 4. Requires speculation or assumptions
 ---Information Sources---
 1. Knowledge Graph Analysis Results (Structured Information):
 {kg_context}
 2. Vector Search Results (Original Text):
 {vector_context}
 ---Response Format---
 Target response format and length requirements: {response_type}
 Response language: Analyze user message language and respond in the same language
 Image inclusion: If source information contains relevant images in HTML img tags, include them in the response
 ---Guidelines---
 1. Language Recognition and Usage:
   - Carefully analyze the language used in user message
   - If question is in Chinese, respond in Chinese (e.g., "非常抱歉，基于现有信息我无法完整回答这个问题")
   - If question is in English, respond in English
   - If question is in other languages, respond in the same language
 2. Information Usage Rules:
   - Must reference both knowledge graph and vector search results
   - Each statement must clearly indicate its source
   - Forbidden to use information outside provided sources
   - If information is insufficient, politely state inability to answer in user's language
   - When relevant images are found in source information, include them using HTML img tags
 3. Response Standards:
   - Strictly follow specified format and length requirements
   - Use markdown format for organization
   - Use quotation marks for direct quotes
   - Clearly distinguish between factual statements and sources
   - No speculation or assumptions allowed
   - Preserve and include HTML img tags for relevant images
   - Place images appropriately within the context of the answer
 4. Information Integration Requirements:
   - Only integrate directly relevant information
   - No excessive interpretation or reasoning
   - Maintain objectivity, no personal views
   - If information conflicts, note it and prioritize knowledge graph
   - When information is incomplete, clearly state the gaps
   - Include relevant images that support or illustrate the answer
 5. Quality Control:
   - Every answer must be traceable to provided sources
   - No vague or uncertain expressions
   - No subjective judgments
   - No filling in information gaps
   - No supplementing with common sense or background knowledge
   - Only include images that are directly relevant to the question
   - Maintain original img tags without modification
 Processing Flow:
 1. First identify the language of user message
 2. Analyze provided knowledge graph and vector search information
 3. Identify relevant images in HTML img tags from the sources
 4. Organize and generate response in the same language as user, incorporating relevant images
 5. If unable to answer, express this politely in user's language with an explanation
 Remember: It's better to say "I apologize, but I am unable to provide a complete answer to this question" (in the user's language, maintaining politeness) than to use information outside provided sources or make speculations. When including images, only use those that are directly relevant and helpful to the answer."""