From 0b941178482691394a9d2385fdb8b530f9818f84 Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Wed, 19 Feb 2025 11:38:50 +0100
Subject: [PATCH 1/9] Add LlamaIndex LLM implementation module

- Implemented LlamaIndex interface for language model interactions
- Added async chat completion support
- Included embedding generation functionality
- Implemented retry mechanisms for API calls
- Added configuration and message formatting utilities
- Supports OpenAI-style message handling and external settings
---
 lightrag/llm/llama_index_impl.py | 249 +++++++++++++++++++++++++++++++
 1 file changed, 249 insertions(+)
 create mode 100644 lightrag/llm/llama_index_impl.py

diff --git a/lightrag/llm/llama_index_impl.py b/lightrag/llm/llama_index_impl.py
new file mode 100644
index 00000000..f6667c00
--- /dev/null
+++ b/lightrag/llm/llama_index_impl.py
@@ -0,0 +1,249 @@
+"""
+LlamaIndex LLM Interface Module
+==========================
+
+This module provides interfaces for interacting with LlamaIndex's language models,
+including text generation and embedding capabilities.
+
+Author: Lightrag team
+Created: 2024-03-19
+License: MIT License
+
+Version: 1.0.0
+
+Change Log:
+- 1.0.0 (2024-03-19): Initial release
+    * Added async chat completion support
+    * Added embedding generation
+    * Added stream response capability
+    * Added support for external settings configuration
+    * Added OpenAI-style message handling
+
+Dependencies:
+    - llama_index
+    - numpy
+    - pipmaster
+    - Python >= 3.10
+
+Usage:
+    from lightrag.llm.llama_index_impl import llama_index_complete, llama_index_embed
+"""
+
+__version__ = "1.0.0"
+__author__ = "lightrag Team"
+__status__ = "Production"
+
+import pipmaster as pm
+from core.logging_config import setup_logger
+from llama_index.core.llms import (
+    ChatMessage,
+    MessageRole,
+    ChatResponse,
+)
+from typing import List, Optional
+
+# Install required dependencies
+if not pm.is_installed("llama-index"):
+    pm.install("llama-index")
+
+from llama_index.core.embeddings import BaseEmbedding
+from llama_index.core.settings import Settings as LlamaIndexSettings
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+from lightrag.utils import (
+    wrap_embedding_func_with_attrs,
+    locate_json_string_body_from_string,
+)
+from lightrag.exceptions import (
+    APIConnectionError,
+    RateLimitError,
+    APITimeoutError,
+)
+import numpy as np
+
+logger = setup_logger("lightrag.llm.llama_index_impl")
+
+
+def configure_llama_index(settings: LlamaIndexSettings = None, **kwargs):
+    """
+    Configure LlamaIndex settings.
+
+    Args:
+        settings: LlamaIndex Settings instance. If None, uses default settings.
+        **kwargs: Additional settings to override/configure
+    """
+    if settings is None:
+        settings = LlamaIndexSettings()
+
+    # Update settings with any provided kwargs
+    for key, value in kwargs.items():
+        if hasattr(settings, key):
+            setattr(settings, key, value)
+        else:
+            logger.warning(f"Unknown LlamaIndex setting: {key}")
+
+    # Set as global settings
+    LlamaIndexSettings.set_global(settings)
+    return settings
+
+
+def format_chat_messages(messages):
+    """Format chat messages into LlamaIndex format."""
+    formatted_messages = []
+
+    for msg in messages:
+        role = msg.get("role", "user")
+        content = msg.get("content", "")
+
+        if role == "system":
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.SYSTEM, content=content)
+            )
+        elif role == "assistant":
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.ASSISTANT, content=content)
+            )
+        elif role == "user":
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.USER, content=content)
+            )
+        else:
+            logger.warning(f"Unknown role {role}, treating as user message")
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.USER, content=content)
+            )
+
+    return formatted_messages
+
+
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=60),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
+)
+async def llama_index_complete_if_cache(
+    model: str,
+    prompt: str,
+    system_prompt: Optional[str] = None,
+    history_messages: List[dict] = [],
+    **kwargs,
+) -> str:
+    """Complete the prompt using LlamaIndex."""
+    try:
+        # Format messages for chat
+        formatted_messages = []
+
+        # Add system message if provided
+        if system_prompt:
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)
+            )
+
+        # Add history messages
+        for msg in history_messages:
+            formatted_messages.append(
+                ChatMessage(
+                    role=MessageRole.USER
+                    if msg["role"] == "user"
+                    else MessageRole.ASSISTANT,
+                    content=msg["content"],
+                )
+            )
+
+        # Add current prompt
+        formatted_messages.append(ChatMessage(role=MessageRole.USER, content=prompt))
+
+        # Get LLM instance from kwargs
+        if "llm_instance" not in kwargs:
+            raise ValueError("llm_instance must be provided in kwargs")
+        llm = kwargs["llm_instance"]
+
+        # Get response
+        response: ChatResponse = await llm.achat(messages=formatted_messages)
+
+        # In newer versions, the response is in message.content
+        content = response.message.content
+        return content
+
+    except Exception as e:
+        logger.error(f"Error in llama_index_complete_if_cache: {str(e)}")
+        raise
+
+
+async def llama_index_complete(
+    prompt,
+    system_prompt=None,
+    history_messages=None,
+    keyword_extraction=False,
+    settings: LlamaIndexSettings = None,
+    **kwargs,
+) -> str:
+    """
+    Main completion function for LlamaIndex
+
+    Args:
+        prompt: Input prompt
+        system_prompt: Optional system prompt
+        history_messages: Optional chat history
+        keyword_extraction: Whether to extract keywords from response
+        settings: Optional LlamaIndex settings
+        **kwargs: Additional arguments
+    """
+    if history_messages is None:
+        history_messages = []
+
+    keyword_extraction = kwargs.pop("keyword_extraction", None)
+    result = await llama_index_complete_if_cache(
+        kwargs.get("llm_instance"),
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        **kwargs,
+    )
+    if keyword_extraction:
+        return locate_json_string_body_from_string(result)
+    return result
+
+
+@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=60),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
+)
+async def llama_index_embed(
+    texts: list[str],
+    embed_model: BaseEmbedding = None,
+    settings: LlamaIndexSettings = None,
+    **kwargs,
+) -> np.ndarray:
+    """
+    Generate embeddings using LlamaIndex
+
+    Args:
+        texts: List of texts to embed
+        embed_model: LlamaIndex embedding model
+        settings: Optional LlamaIndex settings
+        **kwargs: Additional arguments
+    """
+    if settings:
+        configure_llama_index(settings)
+
+    if embed_model is None:
+        raise ValueError("embed_model must be provided")
+
+    # LlamaIndex's embed_query returns a list of floats
+    embeddings = []
+    for text in texts:
+        embedding = await embed_model.aembed_query(text)
+        embeddings.append(embedding)
+
+    return np.array(embeddings)

From 3b25e32e8d4dc2de95805a50e29092b64fd92254 Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Wed, 19 Feb 2025 12:33:01 +0100
Subject: [PATCH 2/9] Removed verbose module-level documentation

---
 lightrag/llm/llama_index_impl.py | 35 --------------------------------
 1 file changed, 35 deletions(-)

diff --git a/lightrag/llm/llama_index_impl.py b/lightrag/llm/llama_index_impl.py
index f6667c00..7eea441a 100644
--- a/lightrag/llm/llama_index_impl.py
+++ b/lightrag/llm/llama_index_impl.py
@@ -1,38 +1,3 @@
-"""
-LlamaIndex LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with LlamaIndex's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-03-19
-License: MIT License
-
-Version: 1.0.0
-
-Change Log:
-- 1.0.0 (2024-03-19): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-    * Added support for external settings configuration
-    * Added OpenAI-style message handling
-
-Dependencies:
-    - llama_index
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from lightrag.llm.llama_index_impl import llama_index_complete, llama_index_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import pipmaster as pm
 from core.logging_config import setup_logger
 from llama_index.core.llms import (

From 8a06be9395f7209337b2b575d30be220ac3b823f Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Wed, 19 Feb 2025 14:54:12 +0100
Subject: [PATCH 3/9] Add LlamaIndex Wrapper and Example Implementations

- Updated README.md with new Wrappers section detailing LlamaIndex integration
- Added LlamaIndex wrapper implementation in `lightrag/wrapper/llama_index_impl.py`
- Created two example scripts demonstrating LlamaIndex usage:
  - Direct OpenAI integration
  - LiteLLM proxy integration
- Added wrapper documentation in `lightrag/wrapper/Readme.md`
- Included comprehensive usage examples and configuration details
---
 README.md                                     |  38 ++++
 ...g_api_llamaindex_direct_demo_simplified.py |  98 +++++++++
 ..._api_llamaindex_litellm_demo_simplified.py | 102 +++++++++
 lightrag/wrapper/Readme.md                    | 177 +++++++++++++++
 lightrag/wrapper/__init__.py                  |   0
 lightrag/wrapper/llama_index_impl.py          | 207 ++++++++++++++++++
 6 files changed, 622 insertions(+)
 create mode 100644 examples/lightrag_api_llamaindex_direct_demo_simplified.py
 create mode 100644 examples/lightrag_api_llamaindex_litellm_demo_simplified.py
 create mode 100644 lightrag/wrapper/Readme.md
 create mode 100644 lightrag/wrapper/__init__.py
 create mode 100644 lightrag/wrapper/llama_index_impl.py

diff --git a/README.md b/README.md
index 97d6279c..432261f7 100644
--- a/README.md
+++ b/README.md
@@ -312,7 +312,45 @@ rag = LightRAG(
 In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
 
 </details>
+<details>
+<summary> <b>Wrappers</b> </summary>
 
+LightRAG supports integration with various frameworks and model providers through wrappers. These wrappers provide a consistent interface while abstracting away the specifics of each framework.
+
+### Current Wrappers
+
+1. **LlamaIndex** (`wrapper/llama_index_impl.py`):
+   - Integrates with OpenAI and other providers through LlamaIndex
+   - Supports both direct API access and proxy services like LiteLLM
+   - Provides consistent interfaces for embeddings and completions
+   - See [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md) for detailed setup and examples
+
+### Example Usage
+
+```python
+# Using LlamaIndex with direct OpenAI access
+from lightrag import LightRAG
+from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+
+rag = LightRAG(
+    working_dir="your/path",
+    llm_model_func=llm_model_func,  # LlamaIndex-compatible completion function
+    embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
+        embedding_dim=1536,
+        max_token_size=8192,
+        func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
+    ),
+)
+```
+
+#### For detailed documentation and examples, see:
+- [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md)
+- [Direct OpenAI Example](examples/lightrag_api_llamaindex_direct_demo_simplified.py)
+- [LiteLLM Proxy Example](examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
+
+</details>
 <details>
 <summary> <b>Conversation History Support</b> </summary>
 
diff --git a/examples/lightrag_api_llamaindex_direct_demo_simplified.py b/examples/lightrag_api_llamaindex_direct_demo_simplified.py
new file mode 100644
index 00000000..50dfec96
--- /dev/null
+++ b/examples/lightrag_api_llamaindex_direct_demo_simplified.py
@@ -0,0 +1,98 @@
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from lightrag.utils import EmbeddingFunc
+from llama_index.llms.openai import OpenAI
+from llama_index.embeddings.openai import OpenAIEmbedding
+import asyncio
+
+# Configure working directory
+DEFAULT_RAG_DIR = "index_default"
+WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
+print(f"WORKING_DIR: {WORKING_DIR}")
+
+# Model configuration
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
+print(f"LLM_MODEL: {LLM_MODEL}")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
+print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
+EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
+print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
+
+# OpenAI configuration
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your-api-key-here")
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+# Initialize LLM function
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    try:
+        # Initialize OpenAI if not in kwargs
+        if 'llm_instance' not in kwargs:
+            llm_instance = OpenAI(
+                model=LLM_MODEL,
+                api_key=OPENAI_API_KEY,
+                temperature=0.7,
+            )
+            kwargs['llm_instance'] = llm_instance
+
+        response = await llama_index_complete_if_cache(
+            kwargs['llm_instance'],
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            **kwargs,
+        )
+        return response
+    except Exception as e:
+        print(f"LLM request failed: {str(e)}")
+        raise
+
+# Initialize embedding function
+async def embedding_func(texts):
+    try:
+        embed_model = OpenAIEmbedding(
+            model=EMBEDDING_MODEL,
+            api_key=OPENAI_API_KEY,
+        )
+        return await llama_index_embed(texts, embed_model=embed_model)
+    except Exception as e:
+        print(f"Embedding failed: {str(e)}")
+        raise
+
+# Get embedding dimension
+async def get_embedding_dim():
+    test_text = ["This is a test sentence."]
+    embedding = await embedding_func(test_text)
+    embedding_dim = embedding.shape[1]
+    print(f"embedding_dim={embedding_dim}")
+    return embedding_dim
+
+# Initialize RAG instance
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=llm_model_func,
+    embedding_func=EmbeddingFunc(
+        embedding_dim=asyncio.run(get_embedding_dim()),
+        max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
+        func=embedding_func,
+    ),
+)
+
+# Insert example text
+with open("./book.txt", "r", encoding="utf-8") as f:
+    rag.insert(f.read())
+
+# Test different query modes
+print("\nNaive Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+print("\nLocal Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+print("\nGlobal Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+print("\nHybrid Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) 
\ No newline at end of file
diff --git a/examples/lightrag_api_llamaindex_litellm_demo_simplified.py b/examples/lightrag_api_llamaindex_litellm_demo_simplified.py
new file mode 100644
index 00000000..11bdeba8
--- /dev/null
+++ b/examples/lightrag_api_llamaindex_litellm_demo_simplified.py
@@ -0,0 +1,102 @@
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from lightrag.utils import EmbeddingFunc
+from llama_index.llms.litellm import LiteLLM
+from llama_index.embeddings.litellm import LiteLLMEmbedding
+import asyncio
+
+# Configure working directory
+DEFAULT_RAG_DIR = "index_default"
+WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
+print(f"WORKING_DIR: {WORKING_DIR}")
+
+# Model configuration
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o")
+print(f"LLM_MODEL: {LLM_MODEL}")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "embedding-model")
+print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
+EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
+print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
+
+# LiteLLM configuration
+LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000")
+print(f"LITELLM_URL: {LITELLM_URL}")
+LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-1234")
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+# Initialize LLM function
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    try:
+        # Initialize LiteLLM if not in kwargs
+        if 'llm_instance' not in kwargs:
+            llm_instance = LiteLLM(
+                model=f"openai/{LLM_MODEL}",  # Format: "provider/model_name"
+                api_base=LITELLM_URL,
+                api_key=LITELLM_KEY,
+                temperature=0.7,
+            )
+            kwargs['llm_instance'] = llm_instance
+
+        response = await llama_index_complete_if_cache(
+            kwargs['llm_instance'],
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            **kwargs,
+        )
+        return response
+    except Exception as e:
+        print(f"LLM request failed: {str(e)}")
+        raise
+
+# Initialize embedding function
+async def embedding_func(texts):
+    try:
+        embed_model = LiteLLMEmbedding(
+            model_name=f"openai/{EMBEDDING_MODEL}",
+            api_base=LITELLM_URL,
+            api_key=LITELLM_KEY,
+        )
+        return await llama_index_embed(texts, embed_model=embed_model)
+    except Exception as e:
+        print(f"Embedding failed: {str(e)}")
+        raise
+
+# Get embedding dimension
+async def get_embedding_dim():
+    test_text = ["This is a test sentence."]
+    embedding = await embedding_func(test_text)
+    embedding_dim = embedding.shape[1]
+    print(f"embedding_dim={embedding_dim}")
+    return embedding_dim
+
+# Initialize RAG instance
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=llm_model_func,
+    embedding_func=EmbeddingFunc(
+        embedding_dim=asyncio.run(get_embedding_dim()),
+        max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
+        func=embedding_func,
+    ),
+)
+
+# Insert example text
+with open("./book.txt", "r", encoding="utf-8") as f:
+    rag.insert(f.read())
+
+# Test different query modes
+print("\nNaive Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+
+print("\nLocal Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+
+print("\nGlobal Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+
+print("\nHybrid Search:")
+print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) 
\ No newline at end of file
diff --git a/lightrag/wrapper/Readme.md b/lightrag/wrapper/Readme.md
new file mode 100644
index 00000000..ece56458
--- /dev/null
+++ b/lightrag/wrapper/Readme.md
@@ -0,0 +1,177 @@
+## Wrapper Directory
+
+The `wrapper` directory contains integrations with different frameworks. These wrappers provide a consistent interface to LightRAG while abstracting away the specifics of each framework.
+
+## Wrapper Directory Structure
+
+```
+lightrag/
+├── wrapper/                    # Wrappers for different model providers and frameworks
+│   ├── llama_index_impl.py     # LlamaIndex integration for embeddings and completions
+│   └── ...                     # Other framework wrappers
+├── kg/                         # Knowledge graph implementations
+├── utils/                      # Utility functions and helpers
+└── ...
+```
+Current wrappers:
+
+1. **LlamaIndex** (`wrapper/llama_index.py`):
+   - Provides integration with OpenAI and other providers through LlamaIndex
+   - Supports both direct API access and proxy services like LiteLLM
+   - Handles embeddings and completions with consistent interfaces
+   - See example implementations:
+     - [Direct OpenAI Usage](../examples/lightrag_api_llamaindex_direct_demo_simplified.py)
+     - [LiteLLM Proxy Usage](../examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
+
+<details>
+<summary> <b>Using LlamaIndex</b> </summary>
+
+LightRAG supports LlamaIndex for embeddings and completions in two ways: direct OpenAI usage or through LiteLLM proxy.
+
+### Setup
+
+First, install the required dependencies:
+```bash
+pip install llama-index-llms-litellm llama-index-embeddings-litellm
+```
+
+### Standard OpenAI Usage
+
+```python
+from lightrag import LightRAG
+from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+from lightrag.utils import EmbeddingFunc
+
+# Initialize with direct OpenAI access
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    try:
+        # Initialize OpenAI if not in kwargs
+        if 'llm_instance' not in kwargs:
+            llm_instance = OpenAI(
+                model="gpt-4",
+                api_key="your-openai-key",
+                temperature=0.7,
+            )
+            kwargs['llm_instance'] = llm_instance
+
+        response = await llama_index_complete_if_cache(
+            kwargs['llm_instance'],
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            **kwargs,
+        )
+        return response
+    except Exception as e:
+        logger.error(f"LLM request failed: {str(e)}")
+        raise
+
+# Initialize LightRAG with OpenAI
+rag = LightRAG(
+    working_dir="your/path",
+    llm_model_func=llm_model_func,
+    embedding_func=EmbeddingFunc(
+        embedding_dim=1536,
+        max_token_size=8192,
+        func=lambda texts: llama_index_embed(
+            texts,
+            embed_model=OpenAIEmbedding(
+                model="text-embedding-3-large",
+                api_key="your-openai-key"
+            )
+        ),
+    ),
+)
+```
+
+### Using LiteLLM Proxy
+
+1. Use any LLM provider through LiteLLM
+2. Leverage LlamaIndex's embedding and completion capabilities
+3. Maintain consistent configuration across services
+
+```python
+from lightrag import LightRAG
+from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from llama_index.llms.litellm import LiteLLM
+from llama_index.embeddings.litellm import LiteLLMEmbedding
+from lightrag.utils import EmbeddingFunc
+
+# Initialize with LiteLLM proxy
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    try:
+        # Initialize LiteLLM if not in kwargs
+        if 'llm_instance' not in kwargs:
+            llm_instance = LiteLLM(
+                model=f"openai/{settings.LLM_MODEL}",  # Format: "provider/model_name"
+                api_base=settings.LITELLM_URL,
+                api_key=settings.LITELLM_KEY,
+                temperature=0.7,
+            )
+            kwargs['llm_instance'] = llm_instance
+
+        response = await llama_index_complete_if_cache(
+            kwargs['llm_instance'],
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            **kwargs,
+        )
+        return response
+    except Exception as e:
+        logger.error(f"LLM request failed: {str(e)}")
+        raise
+
+# Initialize LightRAG with LiteLLM
+rag = LightRAG(
+    working_dir="your/path",
+    llm_model_func=llm_model_func,
+    embedding_func=EmbeddingFunc(
+        embedding_dim=1536,
+        max_token_size=8192,
+        func=lambda texts: llama_index_embed(
+            texts,
+            embed_model=LiteLLMEmbedding(
+                model_name=f"openai/{settings.EMBEDDING_MODEL}",
+                api_base=settings.LITELLM_URL,
+                api_key=settings.LITELLM_KEY,
+            )
+        ),
+    ),
+)
+```
+
+### Environment Variables
+
+For OpenAI direct usage:
+```bash
+OPENAI_API_KEY=your-openai-key
+```
+
+For LiteLLM proxy:
+```bash
+# LiteLLM Configuration
+LITELLM_URL=http://litellm:4000
+LITELLM_KEY=your-litellm-key
+
+# Model Configuration
+LLM_MODEL=gpt-4
+EMBEDDING_MODEL=text-embedding-3-large
+EMBEDDING_MAX_TOKEN_SIZE=8192
+```
+
+### Key Differences
+1. **Direct OpenAI**:
+   - Simpler setup
+   - Direct API access
+   - Requires OpenAI API key
+
+2. **LiteLLM Proxy**:
+   - Model provider agnostic
+   - Centralized API key management
+   - Support for multiple providers
+   - Better cost control and monitoring
+
+</details>
diff --git a/lightrag/wrapper/__init__.py b/lightrag/wrapper/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/lightrag/wrapper/llama_index_impl.py b/lightrag/wrapper/llama_index_impl.py
new file mode 100644
index 00000000..f79dade5
--- /dev/null
+++ b/lightrag/wrapper/llama_index_impl.py
@@ -0,0 +1,207 @@
+import pipmaster as pm
+from llama_index.core.llms import (
+    ChatMessage,
+    MessageRole,
+    ChatResponse,
+)
+from typing import List, Optional
+
+# Install required dependencies
+if not pm.is_installed("llama-index"):
+    pm.install("llama-index")
+
+from llama_index.core.embeddings import BaseEmbedding
+from llama_index.core.settings import Settings as LlamaIndexSettings
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+from lightrag.utils import (
+    wrap_embedding_func_with_attrs,
+    locate_json_string_body_from_string,
+)
+from lightrag.exceptions import (
+    APIConnectionError,
+    RateLimitError,
+    APITimeoutError,
+)
+import numpy as np
+
+
+def configure_llama_index(settings: LlamaIndexSettings = None, **kwargs):
+    """
+    Configure LlamaIndex settings.
+
+    Args:
+        settings: LlamaIndex Settings instance. If None, uses default settings.
+        **kwargs: Additional settings to override/configure
+    """
+    if settings is None:
+        settings = LlamaIndexSettings()
+
+    # Update settings with any provided kwargs
+    for key, value in kwargs.items():
+        if hasattr(settings, key):
+            setattr(settings, key, value)
+        else:
+            logger.warning(f"Unknown LlamaIndex setting: {key}")
+
+    # Set as global settings
+    LlamaIndexSettings.set_global(settings)
+    return settings
+
+
+def format_chat_messages(messages):
+    """Format chat messages into LlamaIndex format."""
+    formatted_messages = []
+
+    for msg in messages:
+        role = msg.get("role", "user")
+        content = msg.get("content", "")
+
+        if role == "system":
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.SYSTEM, content=content)
+            )
+        elif role == "assistant":
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.ASSISTANT, content=content)
+            )
+        elif role == "user":
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.USER, content=content)
+            )
+        else:
+            logger.warning(f"Unknown role {role}, treating as user message")
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.USER, content=content)
+            )
+
+    return formatted_messages
+
+
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=60),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
+)
+async def llama_index_complete_if_cache(
+    model: str,
+    prompt: str,
+    system_prompt: Optional[str] = None,
+    history_messages: List[dict] = [],
+    **kwargs,
+) -> str:
+    """Complete the prompt using LlamaIndex."""
+    try:
+        # Format messages for chat
+        formatted_messages = []
+
+        # Add system message if provided
+        if system_prompt:
+            formatted_messages.append(
+                ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)
+            )
+
+        # Add history messages
+        for msg in history_messages:
+            formatted_messages.append(
+                ChatMessage(
+                    role=MessageRole.USER
+                    if msg["role"] == "user"
+                    else MessageRole.ASSISTANT,
+                    content=msg["content"],
+                )
+            )
+
+        # Add current prompt
+        formatted_messages.append(ChatMessage(role=MessageRole.USER, content=prompt))
+
+        # Get LLM instance from kwargs
+        if "llm_instance" not in kwargs:
+            raise ValueError("llm_instance must be provided in kwargs")
+        llm = kwargs["llm_instance"]
+
+        # Get response
+        response: ChatResponse = await llm.achat(messages=formatted_messages)
+
+        # In newer versions, the response is in message.content
+        content = response.message.content
+        return content
+
+    except Exception as e:
+        logger.error(f"Error in llama_index_complete_if_cache: {str(e)}")
+        raise
+
+
+async def llama_index_complete(
+    prompt,
+    system_prompt=None,
+    history_messages=None,
+    keyword_extraction=False,
+    settings: LlamaIndexSettings = None,
+    **kwargs,
+) -> str:
+    """
+    Main completion function for LlamaIndex
+
+    Args:
+        prompt: Input prompt
+        system_prompt: Optional system prompt
+        history_messages: Optional chat history
+        keyword_extraction: Whether to extract keywords from response
+        settings: Optional LlamaIndex settings
+        **kwargs: Additional arguments
+    """
+    if history_messages is None:
+        history_messages = []
+
+    keyword_extraction = kwargs.pop("keyword_extraction", None)
+    result = await llama_index_complete_if_cache(
+        kwargs.get("llm_instance"),
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        **kwargs,
+    )
+    if keyword_extraction:
+        return locate_json_string_body_from_string(result)
+    return result
+
+
+@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=60),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
+)
+async def llama_index_embed(
+    texts: list[str],
+    embed_model: BaseEmbedding = None,
+    settings: LlamaIndexSettings = None,
+    **kwargs,
+) -> np.ndarray:
+    """
+    Generate embeddings using LlamaIndex
+
+    Args:
+        texts: List of texts to embed
+        embed_model: LlamaIndex embedding model
+        settings: Optional LlamaIndex settings
+        **kwargs: Additional arguments
+    """
+    if settings:
+        configure_llama_index(settings)
+
+    if embed_model is None:
+        raise ValueError("embed_model must be provided")
+
+    # Use _get_text_embeddings for batch processing
+    embeddings = embed_model._get_text_embeddings(texts)
+    return np.array(embeddings)

From 203fdf2565768da025aadf35eb3b850ed7663f90 Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Wed, 19 Feb 2025 14:55:44 +0100
Subject: [PATCH 4/9] Remove LlamaIndex implementation from llm directory as
 per @MdNazishArmanShorthillsAI

- Deleted `lightrag/llm/llama_index_impl.py`
- Reorganization of the LlamaIndex wrapper location
---
 lightrag/llm/llama_index_impl.py | 214 -------------------------------
 1 file changed, 214 deletions(-)
 delete mode 100644 lightrag/llm/llama_index_impl.py

diff --git a/lightrag/llm/llama_index_impl.py b/lightrag/llm/llama_index_impl.py
deleted file mode 100644
index 7eea441a..00000000
--- a/lightrag/llm/llama_index_impl.py
+++ /dev/null
@@ -1,214 +0,0 @@
-import pipmaster as pm
-from core.logging_config import setup_logger
-from llama_index.core.llms import (
-    ChatMessage,
-    MessageRole,
-    ChatResponse,
-)
-from typing import List, Optional
-
-# Install required dependencies
-if not pm.is_installed("llama-index"):
-    pm.install("llama-index")
-
-from llama_index.core.embeddings import BaseEmbedding
-from llama_index.core.settings import Settings as LlamaIndexSettings
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_exponential,
-    retry_if_exception_type,
-)
-from lightrag.utils import (
-    wrap_embedding_func_with_attrs,
-    locate_json_string_body_from_string,
-)
-from lightrag.exceptions import (
-    APIConnectionError,
-    RateLimitError,
-    APITimeoutError,
-)
-import numpy as np
-
-logger = setup_logger("lightrag.llm.llama_index_impl")
-
-
-def configure_llama_index(settings: LlamaIndexSettings = None, **kwargs):
-    """
-    Configure LlamaIndex settings.
-
-    Args:
-        settings: LlamaIndex Settings instance. If None, uses default settings.
-        **kwargs: Additional settings to override/configure
-    """
-    if settings is None:
-        settings = LlamaIndexSettings()
-
-    # Update settings with any provided kwargs
-    for key, value in kwargs.items():
-        if hasattr(settings, key):
-            setattr(settings, key, value)
-        else:
-            logger.warning(f"Unknown LlamaIndex setting: {key}")
-
-    # Set as global settings
-    LlamaIndexSettings.set_global(settings)
-    return settings
-
-
-def format_chat_messages(messages):
-    """Format chat messages into LlamaIndex format."""
-    formatted_messages = []
-
-    for msg in messages:
-        role = msg.get("role", "user")
-        content = msg.get("content", "")
-
-        if role == "system":
-            formatted_messages.append(
-                ChatMessage(role=MessageRole.SYSTEM, content=content)
-            )
-        elif role == "assistant":
-            formatted_messages.append(
-                ChatMessage(role=MessageRole.ASSISTANT, content=content)
-            )
-        elif role == "user":
-            formatted_messages.append(
-                ChatMessage(role=MessageRole.USER, content=content)
-            )
-        else:
-            logger.warning(f"Unknown role {role}, treating as user message")
-            formatted_messages.append(
-                ChatMessage(role=MessageRole.USER, content=content)
-            )
-
-    return formatted_messages
-
-
-@retry(
-    stop=stop_after_attempt(3),
-    wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type(
-        (RateLimitError, APIConnectionError, APITimeoutError)
-    ),
-)
-async def llama_index_complete_if_cache(
-    model: str,
-    prompt: str,
-    system_prompt: Optional[str] = None,
-    history_messages: List[dict] = [],
-    **kwargs,
-) -> str:
-    """Complete the prompt using LlamaIndex."""
-    try:
-        # Format messages for chat
-        formatted_messages = []
-
-        # Add system message if provided
-        if system_prompt:
-            formatted_messages.append(
-                ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)
-            )
-
-        # Add history messages
-        for msg in history_messages:
-            formatted_messages.append(
-                ChatMessage(
-                    role=MessageRole.USER
-                    if msg["role"] == "user"
-                    else MessageRole.ASSISTANT,
-                    content=msg["content"],
-                )
-            )
-
-        # Add current prompt
-        formatted_messages.append(ChatMessage(role=MessageRole.USER, content=prompt))
-
-        # Get LLM instance from kwargs
-        if "llm_instance" not in kwargs:
-            raise ValueError("llm_instance must be provided in kwargs")
-        llm = kwargs["llm_instance"]
-
-        # Get response
-        response: ChatResponse = await llm.achat(messages=formatted_messages)
-
-        # In newer versions, the response is in message.content
-        content = response.message.content
-        return content
-
-    except Exception as e:
-        logger.error(f"Error in llama_index_complete_if_cache: {str(e)}")
-        raise
-
-
-async def llama_index_complete(
-    prompt,
-    system_prompt=None,
-    history_messages=None,
-    keyword_extraction=False,
-    settings: LlamaIndexSettings = None,
-    **kwargs,
-) -> str:
-    """
-    Main completion function for LlamaIndex
-
-    Args:
-        prompt: Input prompt
-        system_prompt: Optional system prompt
-        history_messages: Optional chat history
-        keyword_extraction: Whether to extract keywords from response
-        settings: Optional LlamaIndex settings
-        **kwargs: Additional arguments
-    """
-    if history_messages is None:
-        history_messages = []
-
-    keyword_extraction = kwargs.pop("keyword_extraction", None)
-    result = await llama_index_complete_if_cache(
-        kwargs.get("llm_instance"),
-        prompt,
-        system_prompt=system_prompt,
-        history_messages=history_messages,
-        **kwargs,
-    )
-    if keyword_extraction:
-        return locate_json_string_body_from_string(result)
-    return result
-
-
-@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
-@retry(
-    stop=stop_after_attempt(3),
-    wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type(
-        (RateLimitError, APIConnectionError, APITimeoutError)
-    ),
-)
-async def llama_index_embed(
-    texts: list[str],
-    embed_model: BaseEmbedding = None,
-    settings: LlamaIndexSettings = None,
-    **kwargs,
-) -> np.ndarray:
-    """
-    Generate embeddings using LlamaIndex
-
-    Args:
-        texts: List of texts to embed
-        embed_model: LlamaIndex embedding model
-        settings: Optional LlamaIndex settings
-        **kwargs: Additional arguments
-    """
-    if settings:
-        configure_llama_index(settings)
-
-    if embed_model is None:
-        raise ValueError("embed_model must be provided")
-
-    # LlamaIndex's embed_query returns a list of floats
-    embeddings = []
-    for text in texts:
-        embedding = await embed_model.aembed_query(text)
-        embeddings.append(embedding)
-
-    return np.array(embeddings)

From 04604841c9c905ab6f18a6b426897e9d9665cd3e Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Wed, 19 Feb 2025 14:59:49 +0100
Subject: [PATCH 5/9] Add logger import

---
 lightrag/wrapper/llama_index_impl.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lightrag/wrapper/llama_index_impl.py b/lightrag/wrapper/llama_index_impl.py
index f79dade5..4e1618b1 100644
--- a/lightrag/wrapper/llama_index_impl.py
+++ b/lightrag/wrapper/llama_index_impl.py
@@ -5,6 +5,7 @@ from llama_index.core.llms import (
     ChatResponse,
 )
 from typing import List, Optional
+from lightrag.utils import logger
 
 # Install required dependencies
 if not pm.is_installed("llama-index"):

From 277070e03bc7202f015cc8ec18cd37bd0b4e20a8 Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Wed, 19 Feb 2025 15:01:51 +0100
Subject: [PATCH 6/9] Linting and formatting

---
 ...g_api_llamaindex_direct_demo_simplified.py | 31 ++++++++++++++-----
 ..._api_llamaindex_litellm_demo_simplified.py | 31 ++++++++++++++-----
 2 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/examples/lightrag_api_llamaindex_direct_demo_simplified.py b/examples/lightrag_api_llamaindex_direct_demo_simplified.py
index 50dfec96..a1781842 100644
--- a/examples/lightrag_api_llamaindex_direct_demo_simplified.py
+++ b/examples/lightrag_api_llamaindex_direct_demo_simplified.py
@@ -1,6 +1,9 @@
 import os
 from lightrag import LightRAG, QueryParam
-from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from lightrag.wrapper.llama_index_impl import (
+    llama_index_complete_if_cache,
+    llama_index_embed,
+)
 from lightrag.utils import EmbeddingFunc
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
@@ -25,20 +28,21 @@ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your-api-key-here")
 if not os.path.exists(WORKING_DIR):
     os.mkdir(WORKING_DIR)
 
+
 # Initialize LLM function
 async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
     try:
         # Initialize OpenAI if not in kwargs
-        if 'llm_instance' not in kwargs:
+        if "llm_instance" not in kwargs:
             llm_instance = OpenAI(
                 model=LLM_MODEL,
                 api_key=OPENAI_API_KEY,
                 temperature=0.7,
             )
-            kwargs['llm_instance'] = llm_instance
+            kwargs["llm_instance"] = llm_instance
 
         response = await llama_index_complete_if_cache(
-            kwargs['llm_instance'],
+            kwargs["llm_instance"],
             prompt,
             system_prompt=system_prompt,
             history_messages=history_messages,
@@ -49,6 +53,7 @@ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwar
         print(f"LLM request failed: {str(e)}")
         raise
 
+
 # Initialize embedding function
 async def embedding_func(texts):
     try:
@@ -61,6 +66,7 @@ async def embedding_func(texts):
         print(f"Embedding failed: {str(e)}")
         raise
 
+
 # Get embedding dimension
 async def get_embedding_dim():
     test_text = ["This is a test sentence."]
@@ -69,6 +75,7 @@ async def get_embedding_dim():
     print(f"embedding_dim={embedding_dim}")
     return embedding_dim
 
+
 # Initialize RAG instance
 rag = LightRAG(
     working_dir=WORKING_DIR,
@@ -86,13 +93,21 @@ with open("./book.txt", "r", encoding="utf-8") as f:
 
 # Test different query modes
 print("\nNaive Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
 
 print("\nLocal Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
 
 print("\nGlobal Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
 
 print("\nHybrid Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) 
\ No newline at end of file
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)
diff --git a/examples/lightrag_api_llamaindex_litellm_demo_simplified.py b/examples/lightrag_api_llamaindex_litellm_demo_simplified.py
index 11bdeba8..a1ab90db 100644
--- a/examples/lightrag_api_llamaindex_litellm_demo_simplified.py
+++ b/examples/lightrag_api_llamaindex_litellm_demo_simplified.py
@@ -1,6 +1,9 @@
 import os
 from lightrag import LightRAG, QueryParam
-from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from lightrag.wrapper.llama_index_impl import (
+    llama_index_complete_if_cache,
+    llama_index_embed,
+)
 from lightrag.utils import EmbeddingFunc
 from llama_index.llms.litellm import LiteLLM
 from llama_index.embeddings.litellm import LiteLLMEmbedding
@@ -27,21 +30,22 @@ LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-1234")
 if not os.path.exists(WORKING_DIR):
     os.mkdir(WORKING_DIR)
 
+
 # Initialize LLM function
 async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
     try:
         # Initialize LiteLLM if not in kwargs
-        if 'llm_instance' not in kwargs:
+        if "llm_instance" not in kwargs:
             llm_instance = LiteLLM(
                 model=f"openai/{LLM_MODEL}",  # Format: "provider/model_name"
                 api_base=LITELLM_URL,
                 api_key=LITELLM_KEY,
                 temperature=0.7,
             )
-            kwargs['llm_instance'] = llm_instance
+            kwargs["llm_instance"] = llm_instance
 
         response = await llama_index_complete_if_cache(
-            kwargs['llm_instance'],
+            kwargs["llm_instance"],
             prompt,
             system_prompt=system_prompt,
             history_messages=history_messages,
@@ -52,6 +56,7 @@ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwar
         print(f"LLM request failed: {str(e)}")
         raise
 
+
 # Initialize embedding function
 async def embedding_func(texts):
     try:
@@ -65,6 +70,7 @@ async def embedding_func(texts):
         print(f"Embedding failed: {str(e)}")
         raise
 
+
 # Get embedding dimension
 async def get_embedding_dim():
     test_text = ["This is a test sentence."]
@@ -73,6 +79,7 @@ async def get_embedding_dim():
     print(f"embedding_dim={embedding_dim}")
     return embedding_dim
 
+
 # Initialize RAG instance
 rag = LightRAG(
     working_dir=WORKING_DIR,
@@ -90,13 +97,21 @@ with open("./book.txt", "r", encoding="utf-8") as f:
 
 # Test different query modes
 print("\nNaive Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))
+)
 
 print("\nLocal Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))
+)
 
 print("\nGlobal Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))
+)
 
 print("\nHybrid Search:")
-print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) 
\ No newline at end of file
+print(
+    rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))
+)

From 173a806b9a8f7a75b50bf3f9321fb1d71d6863ae Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Thu, 20 Feb 2025 10:22:26 +0100
Subject: [PATCH 7/9] Moved back to llm dir as per

https://github.com/HKUDS/LightRAG/pull/864#issuecomment-2669705946

- Created two new example scripts demonstrating LightRAG integration with LlamaIndex:
  - `lightrag_llamaindex_direct_demo.py`: Direct OpenAI integration
  - `lightrag_llamaindex_litellm_demo.py`: LiteLLM proxy integration
- Both examples showcase different search modes (naive, local, global, hybrid)
- Includes configuration for working directory, models, and API settings
- Demonstrates text insertion and querying using LightRAG with LlamaIndex
- removed wrapper directory and references to it
---
 ..._simplified.py => lightrag_llamaindex_direct_demo.py} | 8 ++++----
 ...simplified.py => lightrag_llamaindex_litellm_demo.py} | 9 ++++-----
 lightrag/{wrapper => llm}/Readme.md                      | 0
 lightrag/{wrapper => llm}/llama_index_impl.py            | 0
 lightrag/wrapper/__init__.py                             | 0
 5 files changed, 8 insertions(+), 9 deletions(-)
 rename examples/{lightrag_api_llamaindex_direct_demo_simplified.py => lightrag_llamaindex_direct_demo.py} (95%)
 rename examples/{lightrag_api_llamaindex_litellm_demo_simplified.py => lightrag_llamaindex_litellm_demo.py} (92%)
 rename lightrag/{wrapper => llm}/Readme.md (100%)
 rename lightrag/{wrapper => llm}/llama_index_impl.py (100%)
 delete mode 100644 lightrag/wrapper/__init__.py

diff --git a/examples/lightrag_api_llamaindex_direct_demo_simplified.py b/examples/lightrag_llamaindex_direct_demo.py
similarity index 95%
rename from examples/lightrag_api_llamaindex_direct_demo_simplified.py
rename to examples/lightrag_llamaindex_direct_demo.py
index a1781842..5db158ce 100644
--- a/examples/lightrag_api_llamaindex_direct_demo_simplified.py
+++ b/examples/lightrag_llamaindex_direct_demo.py
@@ -1,6 +1,6 @@
 import os
 from lightrag import LightRAG, QueryParam
-from lightrag.wrapper.llama_index_impl import (
+from lightrag.llm.llama_index_impl import (
     llama_index_complete_if_cache,
     llama_index_embed,
 )
@@ -10,14 +10,13 @@ from llama_index.embeddings.openai import OpenAIEmbedding
 import asyncio
 
 # Configure working directory
-DEFAULT_RAG_DIR = "index_default"
-WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
+WORKING_DIR = "./index_default"
 print(f"WORKING_DIR: {WORKING_DIR}")
 
 # Model configuration
 LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
 print(f"LLM_MODEL: {LLM_MODEL}")
-EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
 print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
 EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
 print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
@@ -26,6 +25,7 @@ print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your-api-key-here")
 
 if not os.path.exists(WORKING_DIR):
+    print(f"Creating working directory: {WORKING_DIR}")
     os.mkdir(WORKING_DIR)
 
 
diff --git a/examples/lightrag_api_llamaindex_litellm_demo_simplified.py b/examples/lightrag_llamaindex_litellm_demo.py
similarity index 92%
rename from examples/lightrag_api_llamaindex_litellm_demo_simplified.py
rename to examples/lightrag_llamaindex_litellm_demo.py
index a1ab90db..3511ecf3 100644
--- a/examples/lightrag_api_llamaindex_litellm_demo_simplified.py
+++ b/examples/lightrag_llamaindex_litellm_demo.py
@@ -1,6 +1,6 @@
 import os
 from lightrag import LightRAG, QueryParam
-from lightrag.wrapper.llama_index_impl import (
+from lightrag.llm.llama_index_impl import (
     llama_index_complete_if_cache,
     llama_index_embed,
 )
@@ -10,14 +10,13 @@ from llama_index.embeddings.litellm import LiteLLMEmbedding
 import asyncio
 
 # Configure working directory
-DEFAULT_RAG_DIR = "index_default"
-WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
+WORKING_DIR = "./index_default"
 print(f"WORKING_DIR: {WORKING_DIR}")
 
 # Model configuration
-LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o")
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
 print(f"LLM_MODEL: {LLM_MODEL}")
-EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "embedding-model")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
 print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
 EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
 print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
diff --git a/lightrag/wrapper/Readme.md b/lightrag/llm/Readme.md
similarity index 100%
rename from lightrag/wrapper/Readme.md
rename to lightrag/llm/Readme.md
diff --git a/lightrag/wrapper/llama_index_impl.py b/lightrag/llm/llama_index_impl.py
similarity index 100%
rename from lightrag/wrapper/llama_index_impl.py
rename to lightrag/llm/llama_index_impl.py
diff --git a/lightrag/wrapper/__init__.py b/lightrag/wrapper/__init__.py
deleted file mode 100644
index e69de29b..00000000

From 9934241a1e44a3e071139f793e9ee4e689d032a3 Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Thu, 20 Feb 2025 10:31:19 +0100
Subject: [PATCH 8/9] Update README.md: Refactor LlamaIndex section and example
 code

- Simplified LlamaIndex documentation in README
- Removed wrapper directory references
- Updated example code to reflect new directory structure
- Cleaned up custom knowledge graph example
- Adjusted file paths and import statements
---
 README.md | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 432261f7..9a518d8d 100644
--- a/README.md
+++ b/README.md
@@ -313,30 +313,26 @@ In order to run this experiment on low RAM GPU you should select small model and
 
 </details>
 <details>
-<summary> <b>Wrappers</b> </summary>
+<summary> <b>LlamaIndex</b> </summary>
 
-LightRAG supports integration with various frameworks and model providers through wrappers. These wrappers provide a consistent interface while abstracting away the specifics of each framework.
+LightRAG supports integration with LlamaIndex.
 
-### Current Wrappers
-
-1. **LlamaIndex** (`wrapper/llama_index_impl.py`):
+1. **LlamaIndex** (`llm/llama_index_impl.py`):
    - Integrates with OpenAI and other providers through LlamaIndex
-   - Supports both direct API access and proxy services like LiteLLM
-   - Provides consistent interfaces for embeddings and completions
-   - See [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md) for detailed setup and examples
+   - See [LlamaIndex Documentation](lightrag/llm/Readme.md) for detailed setup and examples
 
 ### Example Usage
 
 ```python
 # Using LlamaIndex with direct OpenAI access
 from lightrag import LightRAG
-from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI
 
 rag = LightRAG(
     working_dir="your/path",
-    llm_model_func=llm_model_func,  # LlamaIndex-compatible completion function
+    llm_model_func=llama_index_complete_if_cache,  # LlamaIndex-compatible completion function
     embedding_func=EmbeddingFunc(    # LlamaIndex-compatible embedding function
         embedding_dim=1536,
         max_token_size=8192,
@@ -346,9 +342,9 @@ rag = LightRAG(
 ```
 
 #### For detailed documentation and examples, see:
-- [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md)
-- [Direct OpenAI Example](examples/lightrag_api_llamaindex_direct_demo_simplified.py)
-- [LiteLLM Proxy Example](examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
+- [LlamaIndex Documentation](lightrag/llm/Readme.md)
+- [Direct OpenAI Example](examples/lightrag_llamaindex_direct_demo.py)
+- [LiteLLM Proxy Example](examples/lightrag_llamaindex_litellm_demo.py)
 
 </details>
 <details>
@@ -499,22 +495,14 @@ custom_kg = {
         {
             "content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
             "source_id": "Source1",
-            "chunk_order_index": 0,
-        },
-        {
-            "content": "One outstanding feature of ProductX is its advanced AI capabilities.",
-            "source_id": "Source1",
-            "chunk_order_index": 1,
         },
         {
             "content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
             "source_id": "Source2",
-            "chunk_order_index": 0,
         },
         {
             "content": "None",
             "source_id": "UNKNOWN",
-            "chunk_order_index": 0,
         },
     ],
 }

From 6f09bfc970c784ae88b9d8b8dba275213150cfb7 Mon Sep 17 00:00:00 2001
From: Pankaj Kaushal <pankaj@getcalmo.com>
Date: Thu, 20 Feb 2025 10:33:15 +0100
Subject: [PATCH 9/9] Update LlamaIndex README: improve documentation and
 example paths

- Updated file paths for LlamaIndex examples
- Simplified README structure
- Corrected import statements to reflect new directory layout
- Removed outdated wrapper directory references
---
 lightrag/llm/Readme.md | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/lightrag/llm/Readme.md b/lightrag/llm/Readme.md
index ece56458..969d70e3 100644
--- a/lightrag/llm/Readme.md
+++ b/lightrag/llm/Readme.md
@@ -1,27 +1,11 @@
-## Wrapper Directory
 
-The `wrapper` directory contains integrations with different frameworks. These wrappers provide a consistent interface to LightRAG while abstracting away the specifics of each framework.
-
-## Wrapper Directory Structure
-
-```
-lightrag/
-├── wrapper/                    # Wrappers for different model providers and frameworks
-│   ├── llama_index_impl.py     # LlamaIndex integration for embeddings and completions
-│   └── ...                     # Other framework wrappers
-├── kg/                         # Knowledge graph implementations
-├── utils/                      # Utility functions and helpers
-└── ...
-```
-Current wrappers:
-
-1. **LlamaIndex** (`wrapper/llama_index.py`):
+1. **LlamaIndex** (`llm/llama_index.py`):
    - Provides integration with OpenAI and other providers through LlamaIndex
    - Supports both direct API access and proxy services like LiteLLM
    - Handles embeddings and completions with consistent interfaces
    - See example implementations:
-     - [Direct OpenAI Usage](../examples/lightrag_api_llamaindex_direct_demo_simplified.py)
-     - [LiteLLM Proxy Usage](../examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
+     - [Direct OpenAI Usage](../../examples/lightrag_llamaindex_direct_demo.py)
+     - [LiteLLM Proxy Usage](../../examples/lightrag_llamaindex_litellm_demo.py)
 
 <details>
 <summary> <b>Using LlamaIndex</b> </summary>
@@ -39,7 +23,7 @@ pip install llama-index-llms-litellm llama-index-embeddings-litellm
 
 ```python
 from lightrag import LightRAG
-from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI
 from lightrag.utils import EmbeddingFunc
@@ -94,7 +78,7 @@ rag = LightRAG(
 
 ```python
 from lightrag import LightRAG
-from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
 from llama_index.llms.litellm import LiteLLM
 from llama_index.embeddings.litellm import LiteLLMEmbedding
 from lightrag.utils import EmbeddingFunc