Merge commit '59617da83e224e3af1c97fb21dd661b088effc2d' into Improve-prompt-prevent-make-up-answer
This commit is contained in:
@@ -98,6 +98,8 @@ After starting the lightrag-server, you can add an Ollama-type connection in the
|
||||
|
||||
LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.
|
||||
|
||||
For better performance, the API server's default values for TOP_K and COSINE_THRESHOLD are set to 50 and 0.4 respectively. If COSINE_THRESHOLD remains at its default value of 0.2 in LightRAG, many irrelevant entities and relations would be retrieved and sent to the LLM.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables:
|
||||
@@ -111,6 +113,17 @@ PORT=9621
|
||||
WORKING_DIR=/app/data/rag_storage
|
||||
INPUT_DIR=/app/data/inputs
|
||||
|
||||
# RAG Configuration
|
||||
MAX_ASYNC=4
|
||||
MAX_TOKENS=32768
|
||||
EMBEDDING_DIM=1024
|
||||
MAX_EMBED_TOKENS=8192
|
||||
#HISTORY_TURNS=3
|
||||
#CHUNK_SIZE=1200
|
||||
#CHUNK_OVERLAP_SIZE=100
|
||||
#COSINE_THRESHOLD=0.4
|
||||
#TOP_K=50
|
||||
|
||||
# LLM Configuration
|
||||
LLM_BINDING=ollama
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
@@ -124,14 +137,8 @@ EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
EMBEDDING_MODEL=bge-m3:latest
|
||||
|
||||
# RAG Configuration
|
||||
MAX_ASYNC=4
|
||||
MAX_TOKENS=32768
|
||||
EMBEDDING_DIM=1024
|
||||
MAX_EMBED_TOKENS=8192
|
||||
|
||||
# Security
|
||||
LIGHTRAG_API_KEY=
|
||||
#LIGHTRAG_API_KEY=you-api-key-for-accessing-LightRAG
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL=INFO
|
||||
@@ -186,10 +193,9 @@ PORT=7000 python lightrag.py
|
||||
| --ssl | False | Enable HTTPS |
|
||||
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
|
||||
| --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
|
||||
| --top-k | 50 | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. |
|
||||
| --cosine-threshold | 0.4 | The cossine threshold for nodes and relations retrieval, works with top-k to control the retrieval of nodes and relations. |
|
||||
|
||||
|
||||
|
||||
For protecting the server using an authentication key, you can also use an environment variable named `LIGHTRAG_API_KEY`.
|
||||
### Example Usage
|
||||
|
||||
#### Running a Lightrag server with ollama default local server as llm and embedding backends
|
||||
|
@@ -212,8 +212,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
||||
ASCIIColors.yellow(f"{args.chunk_size}")
|
||||
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
|
||||
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
|
||||
ASCIIColors.white(" └─ History Turns: ", end="")
|
||||
ASCIIColors.white(" ├─ History Turns: ", end="")
|
||||
ASCIIColors.yellow(f"{args.history_turns}")
|
||||
ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
|
||||
ASCIIColors.yellow(f"{args.cosine_threshold}")
|
||||
ASCIIColors.white(" └─ Top-K: ", end="")
|
||||
ASCIIColors.yellow(f"{args.top_k}")
|
||||
|
||||
# System Configuration
|
||||
ASCIIColors.magenta("\n🛠️ System Configuration:")
|
||||
@@ -489,6 +493,20 @@ def parse_args() -> argparse.Namespace:
|
||||
help="Number of conversation history turns to include (default: from env or 3)",
|
||||
)
|
||||
|
||||
# Search parameters
|
||||
parser.add_argument(
|
||||
"--top-k",
|
||||
type=int,
|
||||
default=get_env_value("TOP_K", 50, int),
|
||||
help="Number of most similar results to return (default: from env or 50)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cosine-threshold",
|
||||
type=float,
|
||||
default=get_env_value("COSINE_THRESHOLD", 0.4, float),
|
||||
help="Cosine similarity threshold (default: from env or 0.4)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--simulated-model-name",
|
||||
type=str,
|
||||
@@ -862,6 +880,9 @@ def create_app(args):
|
||||
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
||||
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
||||
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
||||
vector_db_storage_cls_kwargs={
|
||||
"cosine_better_than_threshold": args.cosine_threshold
|
||||
},
|
||||
)
|
||||
else:
|
||||
rag = LightRAG(
|
||||
@@ -871,6 +892,9 @@ def create_app(args):
|
||||
else openai_alike_model_complete,
|
||||
chunk_token_size=int(args.chunk_size),
|
||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||
llm_model_kwargs={
|
||||
"timeout": args.timeout,
|
||||
},
|
||||
llm_model_name=args.llm_model,
|
||||
llm_model_max_async=args.max_async,
|
||||
llm_model_max_token_size=args.max_tokens,
|
||||
@@ -879,6 +903,9 @@ def create_app(args):
|
||||
graph_storage=ollama_server_infos.GRAPH_STORAGE,
|
||||
vector_storage=ollama_server_infos.VECTOR_STORAGE,
|
||||
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
|
||||
vector_db_storage_cls_kwargs={
|
||||
"cosine_better_than_threshold": args.cosine_threshold
|
||||
},
|
||||
)
|
||||
|
||||
async def index_file(file_path: Union[str, Path]) -> None:
|
||||
@@ -1068,6 +1095,7 @@ def create_app(args):
|
||||
mode=request.mode,
|
||||
stream=request.stream,
|
||||
only_need_context=request.only_need_context,
|
||||
top_k=args.top_k,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1109,6 +1137,7 @@ def create_app(args):
|
||||
mode=request.mode,
|
||||
stream=True,
|
||||
only_need_context=request.only_need_context,
|
||||
top_k=args.top_k,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1648,6 +1677,7 @@ def create_app(args):
|
||||
"stream": request.stream,
|
||||
"only_need_context": False,
|
||||
"conversation_history": conversation_history,
|
||||
"top_k": args.top_k,
|
||||
}
|
||||
|
||||
if args.history_turns is not None:
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import (
|
||||
TypedDict,
|
||||
@@ -32,7 +33,7 @@ class QueryParam:
|
||||
response_type: str = "Multiple Paragraphs"
|
||||
stream: bool = False
|
||||
# Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
|
||||
top_k: int = 60
|
||||
top_k: int = int(os.getenv("TOP_K", "60"))
|
||||
# Number of document chunks to retrieve.
|
||||
# top_n: int = 10
|
||||
# Number of tokens for the original chunks.
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from typing import Union
|
||||
@@ -12,16 +13,16 @@ from lightrag.utils import logger
|
||||
class ChromaVectorDBStorage(BaseVectorStorage):
|
||||
"""ChromaDB vector storage implementation."""
|
||||
|
||||
cosine_better_than_threshold: float = 0.2
|
||||
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||
|
||||
def __post_init__(self):
|
||||
try:
|
||||
# Use global config value if specified, otherwise use default
|
||||
self.cosine_better_than_threshold = self.global_config.get(
|
||||
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||
self.cosine_better_than_threshold = config.get(
|
||||
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||
)
|
||||
|
||||
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||
user_collection_settings = config.get("collection_settings", {})
|
||||
# Default HNSW index settings for ChromaDB
|
||||
default_collection_settings = {
|
||||
|
@@ -73,9 +73,15 @@ from lightrag.base import (
|
||||
|
||||
@dataclass
|
||||
class NanoVectorDBStorage(BaseVectorStorage):
|
||||
cosine_better_than_threshold: float = 0.2
|
||||
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||
|
||||
def __post_init__(self):
|
||||
# Use global config value if specified, otherwise use default
|
||||
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||
self.cosine_better_than_threshold = config.get(
|
||||
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||
)
|
||||
|
||||
self._client_file_name = os.path.join(
|
||||
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
|
||||
)
|
||||
@@ -83,9 +89,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
||||
self._client = NanoVectorDB(
|
||||
self.embedding_func.embedding_dim, storage_file=self._client_file_name
|
||||
)
|
||||
self.cosine_better_than_threshold = self.global_config.get(
|
||||
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||
)
|
||||
|
||||
async def upsert(self, data: dict[str, dict]):
|
||||
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
|
||||
@@ -134,6 +137,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
||||
async def query(self, query: str, top_k=5):
|
||||
embedding = await self.embedding_func([query])
|
||||
embedding = embedding[0]
|
||||
logger.info(
|
||||
f"Query: {query}, top_k: {top_k}, cosine_better_than_threshold: {self.cosine_better_than_threshold}"
|
||||
)
|
||||
results = self._client.query(
|
||||
query=embedding,
|
||||
top_k=top_k,
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
import asyncio
|
||||
|
||||
# import html
|
||||
@@ -341,10 +342,14 @@ class OracleKVStorage(BaseKVStorage):
|
||||
class OracleVectorDBStorage(BaseVectorStorage):
|
||||
# should pass db object to self.db
|
||||
db: OracleDB = None
|
||||
cosine_better_than_threshold: float = 0.2
|
||||
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||
|
||||
def __post_init__(self):
|
||||
pass
|
||||
# Use global config value if specified, otherwise use default
|
||||
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||
self.cosine_better_than_threshold = config.get(
|
||||
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||
)
|
||||
|
||||
async def upsert(self, data: dict[str, dict]):
|
||||
"""向向量数据库中插入数据"""
|
||||
|
@@ -301,12 +301,14 @@ class PGKVStorage(BaseKVStorage):
|
||||
|
||||
@dataclass
|
||||
class PGVectorStorage(BaseVectorStorage):
|
||||
cosine_better_than_threshold: float = 0.2
|
||||
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||
db: PostgreSQLDB = None
|
||||
|
||||
def __post_init__(self):
|
||||
self._max_batch_size = self.global_config["embedding_batch_num"]
|
||||
self.cosine_better_than_threshold = self.global_config.get(
|
||||
# Use global config value if specified, otherwise use default
|
||||
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||
self.cosine_better_than_threshold = config.get(
|
||||
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||
)
|
||||
|
||||
|
@@ -217,14 +217,16 @@ class TiDBKVStorage(BaseKVStorage):
|
||||
|
||||
@dataclass
|
||||
class TiDBVectorDBStorage(BaseVectorStorage):
|
||||
cosine_better_than_threshold: float = 0.2
|
||||
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||
|
||||
def __post_init__(self):
|
||||
self._client_file_name = os.path.join(
|
||||
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
|
||||
)
|
||||
self._max_batch_size = self.global_config["embedding_batch_num"]
|
||||
self.cosine_better_than_threshold = self.global_config.get(
|
||||
# Use global config value if specified, otherwise use default
|
||||
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||
self.cosine_better_than_threshold = config.get(
|
||||
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||
)
|
||||
|
||||
|
@@ -158,8 +158,8 @@ class LightRAG:
|
||||
# LLM
|
||||
llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
|
||||
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
||||
llm_model_max_token_size: int = 32768
|
||||
llm_model_max_async: int = 16
|
||||
llm_model_max_token_size: int = int(os.getenv("MAX_TOKENS", "32768"))
|
||||
llm_model_max_async: int = int(os.getenv("MAX_ASYNC", "16"))
|
||||
llm_model_kwargs: dict = field(default_factory=dict)
|
||||
|
||||
# storage
|
||||
|
@@ -590,8 +590,8 @@ async def kg_query(
|
||||
query, query_param, global_config, hashing_kv
|
||||
)
|
||||
|
||||
logger.info(f"High-level keywords: {hl_keywords}")
|
||||
logger.info(f"Low-level keywords: {ll_keywords}")
|
||||
logger.debug(f"High-level keywords: {hl_keywords}")
|
||||
logger.debug(f"Low-level keywords: {ll_keywords}")
|
||||
|
||||
# Handle empty keywords
|
||||
if hl_keywords == [] and ll_keywords == []:
|
||||
@@ -1026,6 +1026,10 @@ async def _build_query_context(
|
||||
[hl_relations_context, ll_relations_context],
|
||||
[hl_text_units_context, ll_text_units_context],
|
||||
)
|
||||
# not necessary to use LLM to generate a response
|
||||
if not entities_context.strip() and not relations_context.strip():
|
||||
return None
|
||||
|
||||
return f"""
|
||||
-----Entities-----
|
||||
```csv
|
||||
|
Reference in New Issue
Block a user