Add LlamaIndex Wrapper and Example Implementations

- Updated README.md with new Wrappers section detailing LlamaIndex integration
- Added LlamaIndex wrapper implementation in `lightrag/wrapper/llama_index_impl.py`
- Created two example scripts demonstrating LlamaIndex usage:
  - Direct OpenAI integration
  - LiteLLM proxy integration
- Added wrapper documentation in `lightrag/wrapper/Readme.md`
- Included comprehensive usage examples and configuration details
This commit is contained in:
Pankaj Kaushal
2025-02-19 14:54:12 +01:00
parent 3b25e32e8d
commit 8a06be9395
6 changed files with 622 additions and 0 deletions

View File

@@ -312,7 +312,45 @@ rag = LightRAG(
In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`. In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
</details> </details>
<details>
<summary> <b>Wrappers</b> </summary>
LightRAG supports integration with various frameworks and model providers through wrappers. These wrappers provide a consistent interface while abstracting away the specifics of each framework.
### Current Wrappers
1. **LlamaIndex** (`wrapper/llama_index_impl.py`):
- Integrates with OpenAI and other providers through LlamaIndex
- Supports both direct API access and proxy services like LiteLLM
- Provides consistent interfaces for embeddings and completions
- See [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md) for detailed setup and examples
### Example Usage
```python
# Using LlamaIndex with direct OpenAI access
from lightrag import LightRAG
from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
rag = LightRAG(
working_dir="your/path",
llm_model_func=llm_model_func, # LlamaIndex-compatible completion function
embedding_func=EmbeddingFunc( # LlamaIndex-compatible embedding function
embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(texts, embed_model=embed_model)
),
)
```
#### For detailed documentation and examples, see:
- [LlamaIndex Wrapper Documentation](lightrag/wrapper/Readme.md)
- [Direct OpenAI Example](examples/lightrag_api_llamaindex_direct_demo_simplified.py)
- [LiteLLM Proxy Example](examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
</details>
<details> <details>
<summary> <b>Conversation History Support</b> </summary> <summary> <b>Conversation History Support</b> </summary>

View File

@@ -0,0 +1,98 @@
import os
from lightrag import LightRAG, QueryParam
from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
from lightrag.utils import EmbeddingFunc
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
import asyncio
# Configure working directory
DEFAULT_RAG_DIR = "index_default"
WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
print(f"WORKING_DIR: {WORKING_DIR}")
# Model configuration
LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
print(f"LLM_MODEL: {LLM_MODEL}")
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
# OpenAI configuration
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your-api-key-here")
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
# Initialize LLM function
async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
try:
# Initialize OpenAI if not in kwargs
if 'llm_instance' not in kwargs:
llm_instance = OpenAI(
model=LLM_MODEL,
api_key=OPENAI_API_KEY,
temperature=0.7,
)
kwargs['llm_instance'] = llm_instance
response = await llama_index_complete_if_cache(
kwargs['llm_instance'],
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
**kwargs,
)
return response
except Exception as e:
print(f"LLM request failed: {str(e)}")
raise
# Initialize embedding function
async def embedding_func(texts):
try:
embed_model = OpenAIEmbedding(
model=EMBEDDING_MODEL,
api_key=OPENAI_API_KEY,
)
return await llama_index_embed(texts, embed_model=embed_model)
except Exception as e:
print(f"Embedding failed: {str(e)}")
raise
# Get embedding dimension
async def get_embedding_dim():
test_text = ["This is a test sentence."]
embedding = await embedding_func(test_text)
embedding_dim = embedding.shape[1]
print(f"embedding_dim={embedding_dim}")
return embedding_dim
# Initialize RAG instance
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=asyncio.run(get_embedding_dim()),
max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
func=embedding_func,
),
)
# Insert example text
with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Test different query modes
print("\nNaive Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
print("\nLocal Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
print("\nGlobal Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
print("\nHybrid Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))

View File

@@ -0,0 +1,102 @@
import os
from lightrag import LightRAG, QueryParam
from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
from lightrag.utils import EmbeddingFunc
from llama_index.llms.litellm import LiteLLM
from llama_index.embeddings.litellm import LiteLLMEmbedding
import asyncio
# Configure working directory
DEFAULT_RAG_DIR = "index_default"
WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}")
print(f"WORKING_DIR: {WORKING_DIR}")
# Model configuration
LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o")
print(f"LLM_MODEL: {LLM_MODEL}")
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "embedding-model")
print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
# LiteLLM configuration
LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000")
print(f"LITELLM_URL: {LITELLM_URL}")
LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-1234")
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
# Initialize LLM function
async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
try:
# Initialize LiteLLM if not in kwargs
if 'llm_instance' not in kwargs:
llm_instance = LiteLLM(
model=f"openai/{LLM_MODEL}", # Format: "provider/model_name"
api_base=LITELLM_URL,
api_key=LITELLM_KEY,
temperature=0.7,
)
kwargs['llm_instance'] = llm_instance
response = await llama_index_complete_if_cache(
kwargs['llm_instance'],
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
**kwargs,
)
return response
except Exception as e:
print(f"LLM request failed: {str(e)}")
raise
# Initialize embedding function
async def embedding_func(texts):
try:
embed_model = LiteLLMEmbedding(
model_name=f"openai/{EMBEDDING_MODEL}",
api_base=LITELLM_URL,
api_key=LITELLM_KEY,
)
return await llama_index_embed(texts, embed_model=embed_model)
except Exception as e:
print(f"Embedding failed: {str(e)}")
raise
# Get embedding dimension
async def get_embedding_dim():
test_text = ["This is a test sentence."]
embedding = await embedding_func(test_text)
embedding_dim = embedding.shape[1]
print(f"embedding_dim={embedding_dim}")
return embedding_dim
# Initialize RAG instance
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=asyncio.run(get_embedding_dim()),
max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
func=embedding_func,
),
)
# Insert example text
with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Test different query modes
print("\nNaive Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))
print("\nLocal Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local")))
print("\nGlobal Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global")))
print("\nHybrid Search:")
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")))

177
lightrag/wrapper/Readme.md Normal file
View File

@@ -0,0 +1,177 @@
## Wrapper Directory
The `wrapper` directory contains integrations with different frameworks. These wrappers provide a consistent interface to LightRAG while abstracting away the specifics of each framework.
## Wrapper Directory Structure
```
lightrag/
├── wrapper/ # Wrappers for different model providers and frameworks
│ ├── llama_index_impl.py # LlamaIndex integration for embeddings and completions
│ └── ... # Other framework wrappers
├── kg/ # Knowledge graph implementations
├── utils/ # Utility functions and helpers
└── ...
```
Current wrappers:
1. **LlamaIndex** (`wrapper/llama_index.py`):
- Provides integration with OpenAI and other providers through LlamaIndex
- Supports both direct API access and proxy services like LiteLLM
- Handles embeddings and completions with consistent interfaces
- See example implementations:
- [Direct OpenAI Usage](../examples/lightrag_api_llamaindex_direct_demo_simplified.py)
- [LiteLLM Proxy Usage](../examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
<details>
<summary> <b>Using LlamaIndex</b> </summary>
LightRAG supports LlamaIndex for embeddings and completions in two ways: direct OpenAI usage or through LiteLLM proxy.
### Setup
First, install the required dependencies:
```bash
pip install llama-index-llms-litellm llama-index-embeddings-litellm
```
### Standard OpenAI Usage
```python
from lightrag import LightRAG
from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from lightrag.utils import EmbeddingFunc
# Initialize with direct OpenAI access
async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
try:
# Initialize OpenAI if not in kwargs
if 'llm_instance' not in kwargs:
llm_instance = OpenAI(
model="gpt-4",
api_key="your-openai-key",
temperature=0.7,
)
kwargs['llm_instance'] = llm_instance
response = await llama_index_complete_if_cache(
kwargs['llm_instance'],
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
**kwargs,
)
return response
except Exception as e:
logger.error(f"LLM request failed: {str(e)}")
raise
# Initialize LightRAG with OpenAI
rag = LightRAG(
working_dir="your/path",
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(
texts,
embed_model=OpenAIEmbedding(
model="text-embedding-3-large",
api_key="your-openai-key"
)
),
),
)
```
### Using LiteLLM Proxy
1. Use any LLM provider through LiteLLM
2. Leverage LlamaIndex's embedding and completion capabilities
3. Maintain consistent configuration across services
```python
from lightrag import LightRAG
from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
from llama_index.llms.litellm import LiteLLM
from llama_index.embeddings.litellm import LiteLLMEmbedding
from lightrag.utils import EmbeddingFunc
# Initialize with LiteLLM proxy
async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
try:
# Initialize LiteLLM if not in kwargs
if 'llm_instance' not in kwargs:
llm_instance = LiteLLM(
model=f"openai/{settings.LLM_MODEL}", # Format: "provider/model_name"
api_base=settings.LITELLM_URL,
api_key=settings.LITELLM_KEY,
temperature=0.7,
)
kwargs['llm_instance'] = llm_instance
response = await llama_index_complete_if_cache(
kwargs['llm_instance'],
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
**kwargs,
)
return response
except Exception as e:
logger.error(f"LLM request failed: {str(e)}")
raise
# Initialize LightRAG with LiteLLM
rag = LightRAG(
working_dir="your/path",
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=1536,
max_token_size=8192,
func=lambda texts: llama_index_embed(
texts,
embed_model=LiteLLMEmbedding(
model_name=f"openai/{settings.EMBEDDING_MODEL}",
api_base=settings.LITELLM_URL,
api_key=settings.LITELLM_KEY,
)
),
),
)
```
### Environment Variables
For OpenAI direct usage:
```bash
OPENAI_API_KEY=your-openai-key
```
For LiteLLM proxy:
```bash
# LiteLLM Configuration
LITELLM_URL=http://litellm:4000
LITELLM_KEY=your-litellm-key
# Model Configuration
LLM_MODEL=gpt-4
EMBEDDING_MODEL=text-embedding-3-large
EMBEDDING_MAX_TOKEN_SIZE=8192
```
### Key Differences
1. **Direct OpenAI**:
- Simpler setup
- Direct API access
- Requires OpenAI API key
2. **LiteLLM Proxy**:
- Model provider agnostic
- Centralized API key management
- Support for multiple providers
- Better cost control and monitoring
</details>

View File

View File

@@ -0,0 +1,207 @@
import pipmaster as pm
from llama_index.core.llms import (
ChatMessage,
MessageRole,
ChatResponse,
)
from typing import List, Optional
# Install required dependencies
if not pm.is_installed("llama-index"):
pm.install("llama-index")
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.settings import Settings as LlamaIndexSettings
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type,
)
from lightrag.utils import (
wrap_embedding_func_with_attrs,
locate_json_string_body_from_string,
)
from lightrag.exceptions import (
APIConnectionError,
RateLimitError,
APITimeoutError,
)
import numpy as np
def configure_llama_index(settings: LlamaIndexSettings = None, **kwargs):
"""
Configure LlamaIndex settings.
Args:
settings: LlamaIndex Settings instance. If None, uses default settings.
**kwargs: Additional settings to override/configure
"""
if settings is None:
settings = LlamaIndexSettings()
# Update settings with any provided kwargs
for key, value in kwargs.items():
if hasattr(settings, key):
setattr(settings, key, value)
else:
logger.warning(f"Unknown LlamaIndex setting: {key}")
# Set as global settings
LlamaIndexSettings.set_global(settings)
return settings
def format_chat_messages(messages):
"""Format chat messages into LlamaIndex format."""
formatted_messages = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if role == "system":
formatted_messages.append(
ChatMessage(role=MessageRole.SYSTEM, content=content)
)
elif role == "assistant":
formatted_messages.append(
ChatMessage(role=MessageRole.ASSISTANT, content=content)
)
elif role == "user":
formatted_messages.append(
ChatMessage(role=MessageRole.USER, content=content)
)
else:
logger.warning(f"Unknown role {role}, treating as user message")
formatted_messages.append(
ChatMessage(role=MessageRole.USER, content=content)
)
return formatted_messages
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),
retry=retry_if_exception_type(
(RateLimitError, APIConnectionError, APITimeoutError)
),
)
async def llama_index_complete_if_cache(
model: str,
prompt: str,
system_prompt: Optional[str] = None,
history_messages: List[dict] = [],
**kwargs,
) -> str:
"""Complete the prompt using LlamaIndex."""
try:
# Format messages for chat
formatted_messages = []
# Add system message if provided
if system_prompt:
formatted_messages.append(
ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)
)
# Add history messages
for msg in history_messages:
formatted_messages.append(
ChatMessage(
role=MessageRole.USER
if msg["role"] == "user"
else MessageRole.ASSISTANT,
content=msg["content"],
)
)
# Add current prompt
formatted_messages.append(ChatMessage(role=MessageRole.USER, content=prompt))
# Get LLM instance from kwargs
if "llm_instance" not in kwargs:
raise ValueError("llm_instance must be provided in kwargs")
llm = kwargs["llm_instance"]
# Get response
response: ChatResponse = await llm.achat(messages=formatted_messages)
# In newer versions, the response is in message.content
content = response.message.content
return content
except Exception as e:
logger.error(f"Error in llama_index_complete_if_cache: {str(e)}")
raise
async def llama_index_complete(
prompt,
system_prompt=None,
history_messages=None,
keyword_extraction=False,
settings: LlamaIndexSettings = None,
**kwargs,
) -> str:
"""
Main completion function for LlamaIndex
Args:
prompt: Input prompt
system_prompt: Optional system prompt
history_messages: Optional chat history
keyword_extraction: Whether to extract keywords from response
settings: Optional LlamaIndex settings
**kwargs: Additional arguments
"""
if history_messages is None:
history_messages = []
keyword_extraction = kwargs.pop("keyword_extraction", None)
result = await llama_index_complete_if_cache(
kwargs.get("llm_instance"),
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
**kwargs,
)
if keyword_extraction:
return locate_json_string_body_from_string(result)
return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),
retry=retry_if_exception_type(
(RateLimitError, APIConnectionError, APITimeoutError)
),
)
async def llama_index_embed(
texts: list[str],
embed_model: BaseEmbedding = None,
settings: LlamaIndexSettings = None,
**kwargs,
) -> np.ndarray:
"""
Generate embeddings using LlamaIndex
Args:
texts: List of texts to embed
embed_model: LlamaIndex embedding model
settings: Optional LlamaIndex settings
**kwargs: Additional arguments
"""
if settings:
configure_llama_index(settings)
if embed_model is None:
raise ValueError("embed_model must be provided")
# Use _get_text_embeddings for batch processing
embeddings = embed_model._get_text_embeddings(texts)
return np.array(embeddings)