Refactor: Unify naive context to JSON format

- Merges 'mix' mode query handling into 'hybrid' mode, simplifying query logic by removing the dedicated `mix_kg_vector_query` function - Standardizes vector search result by using JSON string format to build context - Fixes a bug in `query_with_keywords` ensuring `hl_keywords` and `ll_keywords` are correctly passed to `kg_query_with_keywords`
2025-05-07 17:42:14 +08:00
parent 59771b60df
commit 156244e260
3 changed files with 148 additions and 309 deletions
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -721,19 +721,19 @@ def truncate_list_by_token_size(

 def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
    """Convert a 2D string list (table-like data) into a list of dictionaries.
-    
+
    The first row is treated as header containing field names. Subsequent rows become
    dictionary entries where keys come from header and values from row data.
-    
+
    Args:
        data: 2D string array where first row contains headers and rest are data rows.
              Minimum 2 columns required in data rows (rows with <2 elements are skipped).
-    
+
    Returns:
        List of dictionaries where each dict represents a data row with:
        - Keys: Header values from first row
        - Values: Corresponding row values (empty string if missing)
-    
+
    Example:
        Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
        Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
@@ -822,21 +822,33 @@ def xml_to_json(xml_file):
        return None


-def process_combine_contexts(
-    hl_context: list[dict[str, str]], ll_context: list[dict[str, str]]
-):
+def process_combine_contexts(*context_lists):
+    """
+    Combine multiple context lists and remove duplicate content
+
+    Args:
+        *context_lists: Any number of context lists
+
+    Returns:
+        Combined context list with duplicates removed
+    """
    seen_content = {}
    combined_data = []

-    for item in hl_context + ll_context:
-        content_dict = {k: v for k, v in item.items() if k != "id"}
-        content_key = tuple(sorted(content_dict.items()))
-        if content_key not in seen_content:
-            seen_content[content_key] = item
-            combined_data.append(item)
+    # Iterate through all input context lists
+    for context_list in context_lists:
+        if not context_list:  # Skip empty lists
+            continue
+        for item in context_list:
+            content_dict = {k: v for k, v in item.items() if k != "id"}
+            content_key = tuple(sorted(content_dict.items()))
+            if content_key not in seen_content:
+                seen_content[content_key] = item
+                combined_data.append(item)

+    # Reassign IDs
    for i, item in enumerate(combined_data):
-        item["id"] = str(i)
+        item["id"] = str(i + 1)

    return combined_data