Merge branch 'HKUDS:main' into main
This commit is contained in:
@@ -12,6 +12,12 @@ LLM_BINDING=ollama
|
|||||||
LLM_BINDING_HOST=http://host.docker.internal:11434
|
LLM_BINDING_HOST=http://host.docker.internal:11434
|
||||||
LLM_MODEL=mistral-nemo:latest
|
LLM_MODEL=mistral-nemo:latest
|
||||||
|
|
||||||
|
# OpenAI alike example
|
||||||
|
# LLM_BINDING=openai
|
||||||
|
# LLM_BINDING_HOST=https://localhost:11434/api
|
||||||
|
# LLM_MODEL=deepseek-chat
|
||||||
|
# LLM_BINDING_API_KEY=your_api_key
|
||||||
|
|
||||||
# Lollms example
|
# Lollms example
|
||||||
# LLM_BINDING=lollms
|
# LLM_BINDING=lollms
|
||||||
# LLM_BINDING_HOST=http://host.docker.internal:9600
|
# LLM_BINDING_HOST=http://host.docker.internal:9600
|
||||||
|
@@ -35,13 +35,18 @@ For example, you have the possibility to use ollama for the embedding and openai
|
|||||||
|
|
||||||
#### For Ollama Server
|
#### For Ollama Server
|
||||||
- Ollama must be running and accessible
|
- Ollama must be running and accessible
|
||||||
- Default connection: http://localhost:11434
|
- Requires environment variables setup or command line argument provided
|
||||||
- Configure using --ollama-host if running on a different host/port
|
- Environment variables: LLM_BINDING=ollama, LLM_BINDING_HOST, LLM_MODEL
|
||||||
|
- Command line arguments: --llm-binding=ollama, --llm-binding-host, --llm-model
|
||||||
|
- Default connection is http://localhost:11434 if not priveded
|
||||||
|
|
||||||
#### For OpenAI Server
|
> The default MAX_TOKENS(num_ctx) for Ollama is 32768. If your Ollama server is lacking or GPU memory, set it to a lower value.
|
||||||
- Requires valid OpenAI API credentials set in environment variables
|
|
||||||
- OPENAI_API_KEY must be set
|
#### For OpenAI Alike Server
|
||||||
- LLM_BINDING or LLM_MODEL must be set by command line on in environment variables
|
- Requires environment variables setup or command line argument provided
|
||||||
|
- Environment variables: LLM_BINDING=ollama, LLM_BINDING_HOST, LLM_MODEL, LLM_BINDING_API_KEY
|
||||||
|
- Command line arguments: --llm-binding=ollama, --llm-binding-host, --llm-model, --llm-binding-api-key
|
||||||
|
- Default connection is https://api.openai.com/v1 if not priveded
|
||||||
|
|
||||||
#### For Azure OpenAI Server
|
#### For Azure OpenAI Server
|
||||||
Azure OpenAI API can be created using the following commands in Azure CLI (you need to install Azure CLI first from [https://docs.microsoft.com/en-us/cli/azure/install-azure-cli](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)):
|
Azure OpenAI API can be created using the following commands in Azure CLI (you need to install Azure CLI first from [https://docs.microsoft.com/en-us/cli/azure/install-azure-cli](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)):
|
||||||
@@ -62,6 +67,13 @@ az cognitiveservices account keys list --name $RESOURCE_NAME -g $RESOURCE_GROUP_
|
|||||||
```
|
```
|
||||||
The output of the last command will give you the endpoint and the key for the OpenAI API. You can use these values to set the environment variables in the `.env` file.
|
The output of the last command will give you the endpoint and the key for the OpenAI API. You can use these values to set the environment variables in the `.env` file.
|
||||||
|
|
||||||
|
```
|
||||||
|
LLM_BINDING=azure_openai
|
||||||
|
LLM_BINDING_HOST=endpoint_of_azure_ai
|
||||||
|
LLM_MODEL=model_name_of_azure_ai
|
||||||
|
LLM_BINDING_API_KEY=api_key_of_azure_ai
|
||||||
|
```
|
||||||
|
|
||||||
### About Ollama API
|
### About Ollama API
|
||||||
|
|
||||||
We provide an Ollama-compatible interfaces for LightRAG, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat frontends supporting Ollama, such as Open WebUI, to access LightRAG easily.
|
We provide an Ollama-compatible interfaces for LightRAG, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat frontends supporting Ollama, such as Open WebUI, to access LightRAG easily.
|
||||||
@@ -159,6 +171,7 @@ PORT=7000 python lightrag.py
|
|||||||
| --llm-binding | ollama | LLM binding to be used. Supported: lollms, ollama, openai |
|
| --llm-binding | ollama | LLM binding to be used. Supported: lollms, ollama, openai |
|
||||||
| --llm-binding-host | (dynamic) | LLM server host URL. Defaults based on binding: http://localhost:11434 (ollama), http://localhost:9600 (lollms), https://api.openai.com/v1 (openai) |
|
| --llm-binding-host | (dynamic) | LLM server host URL. Defaults based on binding: http://localhost:11434 (ollama), http://localhost:9600 (lollms), https://api.openai.com/v1 (openai) |
|
||||||
| --llm-model | mistral-nemo:latest | LLM model name |
|
| --llm-model | mistral-nemo:latest | LLM model name |
|
||||||
|
| --llm-binding-api-key | None | API Key for OpenAI Alike LLM |
|
||||||
| --embedding-binding | ollama | Embedding binding to be used. Supported: lollms, ollama, openai |
|
| --embedding-binding | ollama | Embedding binding to be used. Supported: lollms, ollama, openai |
|
||||||
| --embedding-binding-host | (dynamic) | Embedding server host URL. Defaults based on binding: http://localhost:11434 (ollama), http://localhost:9600 (lollms), https://api.openai.com/v1 (openai) |
|
| --embedding-binding-host | (dynamic) | Embedding server host URL. Defaults based on binding: http://localhost:11434 (ollama), http://localhost:9600 (lollms), https://api.openai.com/v1 (openai) |
|
||||||
| --embedding-model | bge-m3:latest | Embedding model name |
|
| --embedding-model | bge-m3:latest | Embedding model name |
|
||||||
|
@@ -616,7 +616,7 @@ def create_app(args):
|
|||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
history_messages=history_messages,
|
history_messages=history_messages,
|
||||||
base_url=args.llm_binding_host,
|
base_url=args.llm_binding_host,
|
||||||
api_key=os.getenv("OPENAI_API_KEY"),
|
api_key=args.llm_binding_api_key,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -350,10 +350,11 @@ async def ollama_model_if_cache(
|
|||||||
timeout = kwargs.pop("timeout", None)
|
timeout = kwargs.pop("timeout", None)
|
||||||
kwargs.pop("hashing_kv", None)
|
kwargs.pop("hashing_kv", None)
|
||||||
api_key = kwargs.pop("api_key", None)
|
api_key = kwargs.pop("api_key", None)
|
||||||
headers = {
|
headers = (
|
||||||
"Content-Type": "application/json",
|
{"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
||||||
"Authorization": f"Bearer {api_key}"
|
if api_key
|
||||||
} if api_key else {"Content-Type": "application/json"}
|
else {"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers)
|
ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers)
|
||||||
messages = []
|
messages = []
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
@@ -386,10 +387,11 @@ async def lollms_model_if_cache(
|
|||||||
|
|
||||||
stream = True if kwargs.get("stream") else False
|
stream = True if kwargs.get("stream") else False
|
||||||
api_key = kwargs.pop("api_key", None)
|
api_key = kwargs.pop("api_key", None)
|
||||||
headers = {
|
headers = (
|
||||||
"Content-Type": "application/json",
|
{"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
||||||
"Authorization": f"Bearer {api_key}"
|
if api_key
|
||||||
} if api_key else {"Content-Type": "application/json"}
|
else {"Content-Type": "application/json"}
|
||||||
|
)
|
||||||
|
|
||||||
# Extract lollms specific parameters
|
# Extract lollms specific parameters
|
||||||
request_data = {
|
request_data = {
|
||||||
|
Reference in New Issue
Block a user