add LightRAG init parameters in readme
also fix some error
This commit is contained in:
29
README.md
29
README.md
@@ -511,6 +511,35 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
### LightRAG init parameters
|
||||||
|
|
||||||
|
| **Parameter** | **Type** | **Explanation** | **Default** |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| **working\_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
|
||||||
|
| **kv\_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`, `OracleKVStorage` | `JsonKVStorage` |
|
||||||
|
| **vector\_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`, `OracleVectorDBStorage` | `NanoVectorDBStorage` |
|
||||||
|
| **graph\_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`, `Neo4JStorage`, `OracleGraphStorage` | `NetworkXStorage` |
|
||||||
|
| **log\_level** | | Log level for application runtime | `logging.DEBUG` |
|
||||||
|
| **chunk\_token\_size** | `int` | Maximum token size per chunk when splitting documents | `1200` |
|
||||||
|
| **chunk\_overlap\_token\_size** | `int` | Overlap token size between two chunks when splitting documents | `100` |
|
||||||
|
| **tiktoken\_model\_name** | `str` | Model name for the Tiktoken encoder used to calculate token numbers | `gpt-4o-mini` |
|
||||||
|
| **entity\_extract\_max\_gleaning** | `int` | Number of loops in the entity extraction process, appending history messages | `1` |
|
||||||
|
| **entity\_summary\_to\_max\_tokens** | `int` | Maximum token size for each entity summary | `500` |
|
||||||
|
| **node\_embedding\_algorithm** | `str` | Algorithm for node embedding (currently not used) | `node2vec` |
|
||||||
|
| **node2vec\_params** | `dict` | Parameters for node embedding | `{"dimensions": 1536,"num_walks": 10,"walk_length": 40,"window_size": 2,"iterations": 3,"random_seed": 3,}` |
|
||||||
|
| **embedding\_func** | `EmbeddingFunc` | Function to generate embedding vectors from text | `openai_embedding` |
|
||||||
|
| **embedding\_batch\_num** | `int` | Maximum batch size for embedding processes (multiple texts sent per batch) | `32` |
|
||||||
|
| **embedding\_func\_max\_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
|
||||||
|
| **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
|
||||||
|
| **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
|
||||||
|
| **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768` |
|
||||||
|
| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16` |
|
||||||
|
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
|
||||||
|
| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | |
|
||||||
|
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
||||||
|
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese"}`: sets example limit and output language | `example_number: all examples, language: English` |
|
||||||
|
| **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` |
|
||||||
|
|
||||||
## API Server Implementation
|
## API Server Implementation
|
||||||
|
|
||||||
LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests.
|
LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests.
|
||||||
|
@@ -81,7 +81,7 @@ async def get_embedding_dim():
|
|||||||
|
|
||||||
async def init():
|
async def init():
|
||||||
# Detect embedding dimension
|
# Detect embedding dimension
|
||||||
embedding_dimension = 1024 # await get_embedding_dim()
|
embedding_dimension = await get_embedding_dim()
|
||||||
print(f"Detected embedding dimension: {embedding_dimension}")
|
print(f"Detected embedding dimension: {embedding_dimension}")
|
||||||
# Create Oracle DB connection
|
# Create Oracle DB connection
|
||||||
# The `config` parameter is the connection configuration of Oracle DB
|
# The `config` parameter is the connection configuration of Oracle DB
|
||||||
@@ -105,6 +105,7 @@ async def init():
|
|||||||
await oracle_db.check_tables()
|
await oracle_db.check_tables()
|
||||||
# Initialize LightRAG
|
# Initialize LightRAG
|
||||||
# We use Oracle DB as the KV/vector/graph storage
|
# We use Oracle DB as the KV/vector/graph storage
|
||||||
|
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
enable_llm_cache=False,
|
enable_llm_cache=False,
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
|
@@ -84,6 +84,7 @@ async def main():
|
|||||||
|
|
||||||
# Initialize LightRAG
|
# Initialize LightRAG
|
||||||
# We use Oracle DB as the KV/vector/graph storage
|
# We use Oracle DB as the KV/vector/graph storage
|
||||||
|
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
enable_llm_cache=False,
|
enable_llm_cache=False,
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
@@ -96,8 +97,7 @@ async def main():
|
|||||||
),
|
),
|
||||||
graph_storage="OracleGraphStorage",
|
graph_storage="OracleGraphStorage",
|
||||||
kv_storage="OracleKVStorage",
|
kv_storage="OracleKVStorage",
|
||||||
vector_storage="OracleVectorDBStorage",
|
vector_storage="OracleVectorDBStorage"
|
||||||
addon_params={"example_number": 1, "language": "Simplfied Chinese"},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
|
# Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
|
||||||
|
@@ -72,7 +72,7 @@ async def openai_complete_if_cache(
|
|||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
if r"\u" in content:
|
if r"\u" in content:
|
||||||
content = content.encode("utf-8").decode("unicode_escape")
|
content = content.encode("utf-8").decode("unicode_escape")
|
||||||
print(content)
|
# print(content)
|
||||||
if hashing_kv is not None:
|
if hashing_kv is not None:
|
||||||
await hashing_kv.upsert(
|
await hashing_kv.upsert(
|
||||||
{args_hash: {"return": response.choices[0].message.content, "model": model}}
|
{args_hash: {"return": response.choices[0].message.content, "model": model}}
|
||||||
|
@@ -571,19 +571,19 @@ async def _build_query_context(
|
|||||||
hl_text_units_context,
|
hl_text_units_context,
|
||||||
)
|
)
|
||||||
return f"""
|
return f"""
|
||||||
# -----Entities-----
|
-----Entities-----
|
||||||
# ```csv
|
```csv
|
||||||
# {entities_context}
|
{entities_context}
|
||||||
# ```
|
```
|
||||||
# -----Relationships-----
|
-----Relationships-----
|
||||||
# ```csv
|
```csv
|
||||||
# {relations_context}
|
{relations_context}
|
||||||
# ```
|
```
|
||||||
# -----Sources-----
|
-----Sources-----
|
||||||
# ```csv
|
```csv
|
||||||
# {text_units_context}
|
{text_units_context}
|
||||||
# ```
|
```
|
||||||
# """
|
"""
|
||||||
|
|
||||||
|
|
||||||
async def _get_node_data(
|
async def _get_node_data(
|
||||||
@@ -593,18 +593,18 @@ async def _get_node_data(
|
|||||||
text_chunks_db: BaseKVStorage[TextChunkSchema],
|
text_chunks_db: BaseKVStorage[TextChunkSchema],
|
||||||
query_param: QueryParam,
|
query_param: QueryParam,
|
||||||
):
|
):
|
||||||
# 获取相似的实体
|
# get similar entities
|
||||||
results = await entities_vdb.query(query, top_k=query_param.top_k)
|
results = await entities_vdb.query(query, top_k=query_param.top_k)
|
||||||
if not len(results):
|
if not len(results):
|
||||||
return None
|
return None
|
||||||
# 获取实体信息
|
# get entity information
|
||||||
node_datas = await asyncio.gather(
|
node_datas = await asyncio.gather(
|
||||||
*[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
|
*[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
|
||||||
)
|
)
|
||||||
if not all([n is not None for n in node_datas]):
|
if not all([n is not None for n in node_datas]):
|
||||||
logger.warning("Some nodes are missing, maybe the storage is damaged")
|
logger.warning("Some nodes are missing, maybe the storage is damaged")
|
||||||
|
|
||||||
# 获取实体的度
|
# get entity degree
|
||||||
node_degrees = await asyncio.gather(
|
node_degrees = await asyncio.gather(
|
||||||
*[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
|
*[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
|
||||||
)
|
)
|
||||||
@@ -613,11 +613,11 @@ async def _get_node_data(
|
|||||||
for k, n, d in zip(results, node_datas, node_degrees)
|
for k, n, d in zip(results, node_datas, node_degrees)
|
||||||
if n is not None
|
if n is not None
|
||||||
] # what is this text_chunks_db doing. dont remember it in airvx. check the diagram.
|
] # what is this text_chunks_db doing. dont remember it in airvx. check the diagram.
|
||||||
# 根据实体获取文本片段
|
# get entitytext chunk
|
||||||
use_text_units = await _find_most_related_text_unit_from_entities(
|
use_text_units = await _find_most_related_text_unit_from_entities(
|
||||||
node_datas, query_param, text_chunks_db, knowledge_graph_inst
|
node_datas, query_param, text_chunks_db, knowledge_graph_inst
|
||||||
)
|
)
|
||||||
# 获取关联的边
|
# get relate edges
|
||||||
use_relations = await _find_most_related_edges_from_entities(
|
use_relations = await _find_most_related_edges_from_entities(
|
||||||
node_datas, query_param, knowledge_graph_inst
|
node_datas, query_param, knowledge_graph_inst
|
||||||
)
|
)
|
||||||
@@ -625,7 +625,7 @@ async def _get_node_data(
|
|||||||
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
|
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 构建提示词
|
# build prompt
|
||||||
entites_section_list = [["id", "entity", "type", "description", "rank"]]
|
entites_section_list = [["id", "entity", "type", "description", "rank"]]
|
||||||
for i, n in enumerate(node_datas):
|
for i, n in enumerate(node_datas):
|
||||||
entites_section_list.append(
|
entites_section_list.append(
|
||||||
|
Reference in New Issue
Block a user