Moved back to llm dir as per
https://github.com/HKUDS/LightRAG/pull/864#issuecomment-2669705946 - Created two new example scripts demonstrating LightRAG integration with LlamaIndex: - `lightrag_llamaindex_direct_demo.py`: Direct OpenAI integration - `lightrag_llamaindex_litellm_demo.py`: LiteLLM proxy integration - Both examples showcase different search modes (naive, local, global, hybrid) - Includes configuration for working directory, models, and API settings - Demonstrates text insertion and querying using LightRAG with LlamaIndex - removed wrapper directory and references to it
This commit is contained in:
177
lightrag/llm/Readme.md
Normal file
177
lightrag/llm/Readme.md
Normal file
@@ -0,0 +1,177 @@
|
||||
## Wrapper Directory
|
||||
|
||||
The `wrapper` directory contains integrations with different frameworks. These wrappers provide a consistent interface to LightRAG while abstracting away the specifics of each framework.
|
||||
|
||||
## Wrapper Directory Structure
|
||||
|
||||
```
|
||||
lightrag/
|
||||
├── wrapper/ # Wrappers for different model providers and frameworks
|
||||
│ ├── llama_index_impl.py # LlamaIndex integration for embeddings and completions
|
||||
│ └── ... # Other framework wrappers
|
||||
├── kg/ # Knowledge graph implementations
|
||||
├── utils/ # Utility functions and helpers
|
||||
└── ...
|
||||
```
|
||||
Current wrappers:
|
||||
|
||||
1. **LlamaIndex** (`wrapper/llama_index.py`):
|
||||
- Provides integration with OpenAI and other providers through LlamaIndex
|
||||
- Supports both direct API access and proxy services like LiteLLM
|
||||
- Handles embeddings and completions with consistent interfaces
|
||||
- See example implementations:
|
||||
- [Direct OpenAI Usage](../examples/lightrag_api_llamaindex_direct_demo_simplified.py)
|
||||
- [LiteLLM Proxy Usage](../examples/lightrag_api_llamaindex_litellm_demo_simplified.py)
|
||||
|
||||
<details>
|
||||
<summary> <b>Using LlamaIndex</b> </summary>
|
||||
|
||||
LightRAG supports LlamaIndex for embeddings and completions in two ways: direct OpenAI usage or through LiteLLM proxy.
|
||||
|
||||
### Setup
|
||||
|
||||
First, install the required dependencies:
|
||||
```bash
|
||||
pip install llama-index-llms-litellm llama-index-embeddings-litellm
|
||||
```
|
||||
|
||||
### Standard OpenAI Usage
|
||||
|
||||
```python
|
||||
from lightrag import LightRAG
|
||||
from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
|
||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||
from llama_index.llms.openai import OpenAI
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
|
||||
# Initialize with direct OpenAI access
|
||||
async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
|
||||
try:
|
||||
# Initialize OpenAI if not in kwargs
|
||||
if 'llm_instance' not in kwargs:
|
||||
llm_instance = OpenAI(
|
||||
model="gpt-4",
|
||||
api_key="your-openai-key",
|
||||
temperature=0.7,
|
||||
)
|
||||
kwargs['llm_instance'] = llm_instance
|
||||
|
||||
response = await llama_index_complete_if_cache(
|
||||
kwargs['llm_instance'],
|
||||
prompt,
|
||||
system_prompt=system_prompt,
|
||||
history_messages=history_messages,
|
||||
**kwargs,
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"LLM request failed: {str(e)}")
|
||||
raise
|
||||
|
||||
# Initialize LightRAG with OpenAI
|
||||
rag = LightRAG(
|
||||
working_dir="your/path",
|
||||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=1536,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: llama_index_embed(
|
||||
texts,
|
||||
embed_model=OpenAIEmbedding(
|
||||
model="text-embedding-3-large",
|
||||
api_key="your-openai-key"
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
### Using LiteLLM Proxy
|
||||
|
||||
1. Use any LLM provider through LiteLLM
|
||||
2. Leverage LlamaIndex's embedding and completion capabilities
|
||||
3. Maintain consistent configuration across services
|
||||
|
||||
```python
|
||||
from lightrag import LightRAG
|
||||
from lightrag.wrapper.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
|
||||
from llama_index.llms.litellm import LiteLLM
|
||||
from llama_index.embeddings.litellm import LiteLLMEmbedding
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
|
||||
# Initialize with LiteLLM proxy
|
||||
async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
|
||||
try:
|
||||
# Initialize LiteLLM if not in kwargs
|
||||
if 'llm_instance' not in kwargs:
|
||||
llm_instance = LiteLLM(
|
||||
model=f"openai/{settings.LLM_MODEL}", # Format: "provider/model_name"
|
||||
api_base=settings.LITELLM_URL,
|
||||
api_key=settings.LITELLM_KEY,
|
||||
temperature=0.7,
|
||||
)
|
||||
kwargs['llm_instance'] = llm_instance
|
||||
|
||||
response = await llama_index_complete_if_cache(
|
||||
kwargs['llm_instance'],
|
||||
prompt,
|
||||
system_prompt=system_prompt,
|
||||
history_messages=history_messages,
|
||||
**kwargs,
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"LLM request failed: {str(e)}")
|
||||
raise
|
||||
|
||||
# Initialize LightRAG with LiteLLM
|
||||
rag = LightRAG(
|
||||
working_dir="your/path",
|
||||
llm_model_func=llm_model_func,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=1536,
|
||||
max_token_size=8192,
|
||||
func=lambda texts: llama_index_embed(
|
||||
texts,
|
||||
embed_model=LiteLLMEmbedding(
|
||||
model_name=f"openai/{settings.EMBEDDING_MODEL}",
|
||||
api_base=settings.LITELLM_URL,
|
||||
api_key=settings.LITELLM_KEY,
|
||||
)
|
||||
),
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
For OpenAI direct usage:
|
||||
```bash
|
||||
OPENAI_API_KEY=your-openai-key
|
||||
```
|
||||
|
||||
For LiteLLM proxy:
|
||||
```bash
|
||||
# LiteLLM Configuration
|
||||
LITELLM_URL=http://litellm:4000
|
||||
LITELLM_KEY=your-litellm-key
|
||||
|
||||
# Model Configuration
|
||||
LLM_MODEL=gpt-4
|
||||
EMBEDDING_MODEL=text-embedding-3-large
|
||||
EMBEDDING_MAX_TOKEN_SIZE=8192
|
||||
```
|
||||
|
||||
### Key Differences
|
||||
1. **Direct OpenAI**:
|
||||
- Simpler setup
|
||||
- Direct API access
|
||||
- Requires OpenAI API key
|
||||
|
||||
2. **LiteLLM Proxy**:
|
||||
- Model provider agnostic
|
||||
- Centralized API key management
|
||||
- Support for multiple providers
|
||||
- Better cost control and monitoring
|
||||
|
||||
</details>
|
208
lightrag/llm/llama_index_impl.py
Normal file
208
lightrag/llm/llama_index_impl.py
Normal file
@@ -0,0 +1,208 @@
|
||||
import pipmaster as pm
|
||||
from llama_index.core.llms import (
|
||||
ChatMessage,
|
||||
MessageRole,
|
||||
ChatResponse,
|
||||
)
|
||||
from typing import List, Optional
|
||||
from lightrag.utils import logger
|
||||
|
||||
# Install required dependencies
|
||||
if not pm.is_installed("llama-index"):
|
||||
pm.install("llama-index")
|
||||
|
||||
from llama_index.core.embeddings import BaseEmbedding
|
||||
from llama_index.core.settings import Settings as LlamaIndexSettings
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_exponential,
|
||||
retry_if_exception_type,
|
||||
)
|
||||
from lightrag.utils import (
|
||||
wrap_embedding_func_with_attrs,
|
||||
locate_json_string_body_from_string,
|
||||
)
|
||||
from lightrag.exceptions import (
|
||||
APIConnectionError,
|
||||
RateLimitError,
|
||||
APITimeoutError,
|
||||
)
|
||||
import numpy as np
|
||||
|
||||
|
||||
def configure_llama_index(settings: LlamaIndexSettings = None, **kwargs):
|
||||
"""
|
||||
Configure LlamaIndex settings.
|
||||
|
||||
Args:
|
||||
settings: LlamaIndex Settings instance. If None, uses default settings.
|
||||
**kwargs: Additional settings to override/configure
|
||||
"""
|
||||
if settings is None:
|
||||
settings = LlamaIndexSettings()
|
||||
|
||||
# Update settings with any provided kwargs
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(settings, key):
|
||||
setattr(settings, key, value)
|
||||
else:
|
||||
logger.warning(f"Unknown LlamaIndex setting: {key}")
|
||||
|
||||
# Set as global settings
|
||||
LlamaIndexSettings.set_global(settings)
|
||||
return settings
|
||||
|
||||
|
||||
def format_chat_messages(messages):
|
||||
"""Format chat messages into LlamaIndex format."""
|
||||
formatted_messages = []
|
||||
|
||||
for msg in messages:
|
||||
role = msg.get("role", "user")
|
||||
content = msg.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
formatted_messages.append(
|
||||
ChatMessage(role=MessageRole.SYSTEM, content=content)
|
||||
)
|
||||
elif role == "assistant":
|
||||
formatted_messages.append(
|
||||
ChatMessage(role=MessageRole.ASSISTANT, content=content)
|
||||
)
|
||||
elif role == "user":
|
||||
formatted_messages.append(
|
||||
ChatMessage(role=MessageRole.USER, content=content)
|
||||
)
|
||||
else:
|
||||
logger.warning(f"Unknown role {role}, treating as user message")
|
||||
formatted_messages.append(
|
||||
ChatMessage(role=MessageRole.USER, content=content)
|
||||
)
|
||||
|
||||
return formatted_messages
|
||||
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
retry=retry_if_exception_type(
|
||||
(RateLimitError, APIConnectionError, APITimeoutError)
|
||||
),
|
||||
)
|
||||
async def llama_index_complete_if_cache(
|
||||
model: str,
|
||||
prompt: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
history_messages: List[dict] = [],
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""Complete the prompt using LlamaIndex."""
|
||||
try:
|
||||
# Format messages for chat
|
||||
formatted_messages = []
|
||||
|
||||
# Add system message if provided
|
||||
if system_prompt:
|
||||
formatted_messages.append(
|
||||
ChatMessage(role=MessageRole.SYSTEM, content=system_prompt)
|
||||
)
|
||||
|
||||
# Add history messages
|
||||
for msg in history_messages:
|
||||
formatted_messages.append(
|
||||
ChatMessage(
|
||||
role=MessageRole.USER
|
||||
if msg["role"] == "user"
|
||||
else MessageRole.ASSISTANT,
|
||||
content=msg["content"],
|
||||
)
|
||||
)
|
||||
|
||||
# Add current prompt
|
||||
formatted_messages.append(ChatMessage(role=MessageRole.USER, content=prompt))
|
||||
|
||||
# Get LLM instance from kwargs
|
||||
if "llm_instance" not in kwargs:
|
||||
raise ValueError("llm_instance must be provided in kwargs")
|
||||
llm = kwargs["llm_instance"]
|
||||
|
||||
# Get response
|
||||
response: ChatResponse = await llm.achat(messages=formatted_messages)
|
||||
|
||||
# In newer versions, the response is in message.content
|
||||
content = response.message.content
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in llama_index_complete_if_cache: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
async def llama_index_complete(
|
||||
prompt,
|
||||
system_prompt=None,
|
||||
history_messages=None,
|
||||
keyword_extraction=False,
|
||||
settings: LlamaIndexSettings = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Main completion function for LlamaIndex
|
||||
|
||||
Args:
|
||||
prompt: Input prompt
|
||||
system_prompt: Optional system prompt
|
||||
history_messages: Optional chat history
|
||||
keyword_extraction: Whether to extract keywords from response
|
||||
settings: Optional LlamaIndex settings
|
||||
**kwargs: Additional arguments
|
||||
"""
|
||||
if history_messages is None:
|
||||
history_messages = []
|
||||
|
||||
keyword_extraction = kwargs.pop("keyword_extraction", None)
|
||||
result = await llama_index_complete_if_cache(
|
||||
kwargs.get("llm_instance"),
|
||||
prompt,
|
||||
system_prompt=system_prompt,
|
||||
history_messages=history_messages,
|
||||
**kwargs,
|
||||
)
|
||||
if keyword_extraction:
|
||||
return locate_json_string_body_from_string(result)
|
||||
return result
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
retry=retry_if_exception_type(
|
||||
(RateLimitError, APIConnectionError, APITimeoutError)
|
||||
),
|
||||
)
|
||||
async def llama_index_embed(
|
||||
texts: list[str],
|
||||
embed_model: BaseEmbedding = None,
|
||||
settings: LlamaIndexSettings = None,
|
||||
**kwargs,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings using LlamaIndex
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
embed_model: LlamaIndex embedding model
|
||||
settings: Optional LlamaIndex settings
|
||||
**kwargs: Additional arguments
|
||||
"""
|
||||
if settings:
|
||||
configure_llama_index(settings)
|
||||
|
||||
if embed_model is None:
|
||||
raise ValueError("embed_model must be provided")
|
||||
|
||||
# Use _get_text_embeddings for batch processing
|
||||
embeddings = embed_model._get_text_embeddings(texts)
|
||||
return np.array(embeddings)
|
Reference in New Issue
Block a user