From 108fc4a1ee3e65ee9a28018ec60eea94956f5df4 Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 21 Oct 2024 11:53:06 +0000 Subject: [PATCH 01/39] Add ability to passadditional parameters to ollama library like host and timeout --- .gitignore | 121 +++++++++++++++++++++++++++++++ examples/lightrag_ollama_demo.py | 31 +++++--- lightrag/lightrag.py | 3 +- lightrag/llm.py | 9 ++- 4 files changed, 151 insertions(+), 13 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..422c67ce --- /dev/null +++ b/.gitignore @@ -0,0 +1,121 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +*.egg +*.egg-info/ +dist/ +build/ +*.whl + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.env.* +.venv +.venv.* +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyderworkspace + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Example files +book.txt +dickens/ diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index a2d04aa6..dfda26e6 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,4 +1,7 @@ import os +import logging + +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding @@ -11,15 +14,17 @@ if not os.path.exists(WORKING_DIR): rag = LightRAG( working_dir=WORKING_DIR, - llm_model_func=ollama_model_complete, - llm_model_name='your_model_name', + tiktoken_model_name="mistral:7b", + llm_model_func=ollama_model_complete, + llm_model_name="mistral:7b", + llm_model_max_async=2, + llm_model_kwargs={"host": "http://localhost:11434"}, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, func=lambda texts: ollama_embedding( - texts, - embed_model="nomic-embed-text" - ) + texts, embed_model="nomic-embed-text", host="http://localhost:11434" + ), ), ) @@ -28,13 +33,21 @@ with open("./book.txt") as f: rag.insert(f.read()) # Perform naive search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) +) # Perform local search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) +) # Perform global search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) +) # Perform hybrid search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) +) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 83312ef6..c3e5cdab 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -86,6 +86,7 @@ class LightRAG: llm_model_name: str = 'meta-llama/Llama-3.2-1B-Instruct'#'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' llm_model_max_token_size: int = 32768 llm_model_max_async: int = 16 + llm_model_kwargs: dict = field(default_factory=dict) # storage key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage @@ -158,7 +159,7 @@ class LightRAG: ) self.llm_model_func = limit_async_func_call(self.llm_model_max_async)( - partial(self.llm_model_func, hashing_kv=self.llm_response_cache) + partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs) ) def insert(self, string_or_strings): diff --git a/lightrag/llm.py b/lightrag/llm.py index 7328a583..aac384d9 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -98,8 +98,10 @@ async def ollama_model_if_cache( ) -> str: kwargs.pop("max_tokens", None) kwargs.pop("response_format", None) + host = kwargs.pop("host", None) + timeout = kwargs.pop("timeout", None) - ollama_client = ollama.AsyncClient() + ollama_client = ollama.AsyncClient(host=host, timeout=timeout) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) @@ -193,10 +195,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray: embeddings = outputs.last_hidden_state.mean(dim=1) return embeddings.detach().numpy() -async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray: +async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray: embed_text = [] + ollama_client = ollama.Client(**kwargs) for text in texts: - data = ollama.embeddings(model=embed_model, prompt=text) + data = ollama_client.embeddings(model=embed_model, prompt=text) embed_text.append(data["embedding"]) return embed_text From 25a2dd41c1e39801f029fcd9fb128b4d8b45356d Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 21 Oct 2024 11:53:06 +0000 Subject: [PATCH 02/39] Add ability to passadditional parameters to ollama library like host and timeout --- .gitignore | 3 ++- examples/lightrag_ollama_demo.py | 3 +++ lightrag/lightrag.py | 3 ++- lightrag/llm.py | 9 ++++++--- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 5a41ae32..9ce353de 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ dickens/ book.txt lightrag-dev/ .idea/ -dist/ \ No newline at end of file +dist/ +.venv/ \ No newline at end of file diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index c61b71c0..f968d26e 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,4 +1,7 @@ import os +import logging + +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 5137af42..d4b1eaa1 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -88,6 +88,7 @@ class LightRAG: llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' llm_model_max_token_size: int = 32768 llm_model_max_async: int = 16 + llm_model_kwargs: dict = field(default_factory=dict) # storage key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage @@ -154,7 +155,7 @@ class LightRAG: ) self.llm_model_func = limit_async_func_call(self.llm_model_max_async)( - partial(self.llm_model_func, hashing_kv=self.llm_response_cache) + partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs) ) def insert(self, string_or_strings): diff --git a/lightrag/llm.py b/lightrag/llm.py index be801e0c..aa818995 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -222,8 +222,10 @@ async def ollama_model_if_cache( ) -> str: kwargs.pop("max_tokens", None) kwargs.pop("response_format", None) + host = kwargs.pop("host", None) + timeout = kwargs.pop("timeout", None) - ollama_client = ollama.AsyncClient() + ollama_client = ollama.AsyncClient(host=host, timeout=timeout) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) @@ -415,10 +417,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray: return embeddings.detach().numpy() -async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray: +async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray: embed_text = [] + ollama_client = ollama.Client(**kwargs) for text in texts: - data = ollama.embeddings(model=embed_model, prompt=text) + data = ollama_client.embeddings(model=embed_model, prompt=text) embed_text.append(data["embedding"]) return embed_text From e54d0536c46d4ecf49f86746109ea6e08505017c Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 21 Oct 2024 13:53:28 +0000 Subject: [PATCH 03/39] Small fix on demo --- examples/lightrag_ollama_demo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index dfda26e6..93196066 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,7 +1,7 @@ import os import logging -logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding @@ -14,7 +14,6 @@ if not os.path.exists(WORKING_DIR): rag = LightRAG( working_dir=WORKING_DIR, - tiktoken_model_name="mistral:7b", llm_model_func=ollama_model_complete, llm_model_name="mistral:7b", llm_model_max_async=2, From 1d24eaf656990fe040ac5c78b93e615a2a5e81fa Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Tue, 22 Oct 2024 14:35:42 +0000 Subject: [PATCH 04/39] Finetune example to be able to run ollama example without need to tweak context size in Modelfile --- examples/lightrag_ollama_demo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index 93196066..6070131f 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -15,9 +15,10 @@ if not os.path.exists(WORKING_DIR): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, - llm_model_name="mistral:7b", - llm_model_max_async=2, - llm_model_kwargs={"host": "http://localhost:11434"}, + llm_model_name="gemma2:2b", + llm_model_max_async=4, + llm_model_max_token_size=32768, + llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}}, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, @@ -27,7 +28,6 @@ rag = LightRAG( ), ) - with open("./book.txt") as f: rag.insert(f.read()) From bd160013fc3053066f503f27fd26ab60a6ef2d95 Mon Sep 17 00:00:00 2001 From: Zhenyu Pan <120090196@link.cuhk.edu.cn> Date: Thu, 24 Oct 2024 00:58:52 +0800 Subject: [PATCH 05/39] [hotfix-#75][embedding] Fix the potential embedding problem --- examples/lightrag_openai_compatible_demo.py | 70 +++++++++++++-------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py index aae56821..25d3722c 100644 --- a/examples/lightrag_openai_compatible_demo.py +++ b/examples/lightrag_openai_compatible_demo.py @@ -34,6 +34,13 @@ async def embedding_func(texts: list[str]) -> np.ndarray: ) +async def get_embedding_dim(): + test_text = ["This is a test sentence."] + embedding = await embedding_func(test_text) + embedding_dim = embedding.shape[1] + return embedding_dim + + # function test async def test_funcs(): result = await llm_model_func("How are you?") @@ -43,37 +50,46 @@ async def test_funcs(): print("embedding_func: ", result) -asyncio.run(test_funcs()) +# asyncio.run(test_funcs()) + +async def main(): + try: + embedding_dimension = await get_embedding_dim() + print(f"Detected embedding dimension: {embedding_dimension}") + + rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=embedding_dimension, max_token_size=8192, func=embedding_func + ), + ) -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=4096, max_token_size=8192, func=embedding_func - ), -) + with open("./book.txt", "r", encoding="utf-8") as f: + rag.insert(f.read()) + # Perform naive search + print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + ) -with open("./book.txt", "r", encoding="utf-8") as f: - rag.insert(f.read()) + # Perform local search + print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + ) -# Perform naive search -print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) -) + # Perform global search + print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + ) -# Perform local search -print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) -) + # Perform hybrid search + print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + ) + except Exception as e: + print(f"An error occurred: {e}") -# Perform global search -print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) -) - -# Perform hybrid search -print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) -) +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file From 060bb1cc5998e23fd93da42ab28847e00d17b73e Mon Sep 17 00:00:00 2001 From: tpoisonooo Date: Fri, 25 Oct 2024 14:14:36 +0800 Subject: [PATCH 06/39] Update lightrag.py --- lightrag/lightrag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 5137af42..b84e22ef 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -208,7 +208,7 @@ class LightRAG: logger.info("[Entity Extraction]...") maybe_new_kg = await extract_entities( inserting_chunks, - knwoledge_graph_inst=self.chunk_entity_relation_graph, + knowledge_graph_inst=self.chunk_entity_relation_graph, entity_vdb=self.entities_vdb, relationships_vdb=self.relationships_vdb, global_config=asdict(self), From 88f2c49b8627262155416ded908ac388ce121852 Mon Sep 17 00:00:00 2001 From: tpoisonooo Date: Fri, 25 Oct 2024 14:15:31 +0800 Subject: [PATCH 07/39] Update operate.py --- lightrag/operate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index a0729cd8..b90a1ca1 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -124,14 +124,14 @@ async def _handle_single_relationship_extraction( async def _merge_nodes_then_upsert( entity_name: str, nodes_data: list[dict], - knwoledge_graph_inst: BaseGraphStorage, + knowledge_graph_inst: BaseGraphStorage, global_config: dict, ): already_entitiy_types = [] already_source_ids = [] already_description = [] - already_node = await knwoledge_graph_inst.get_node(entity_name) + already_node = await knowledge_graph_inst.get_node(entity_name) if already_node is not None: already_entitiy_types.append(already_node["entity_type"]) already_source_ids.extend( From 2401e21ef20f4290c555ff3ae358e01045dbffa6 Mon Sep 17 00:00:00 2001 From: Sanketh Kumar Date: Fri, 25 Oct 2024 13:23:08 +0530 Subject: [PATCH 08/39] Added linting actions for pull request --- .github/workflows/linting.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/linting.yaml diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml new file mode 100644 index 00000000..32886cb0 --- /dev/null +++ b/.github/workflows/linting.yaml @@ -0,0 +1,30 @@ +name: Linting and Formatting + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint-and-format: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files \ No newline at end of file From a157e8e0a24599a296291072beb90fe96c445e6f Mon Sep 17 00:00:00 2001 From: Sanketh Kumar Date: Fri, 25 Oct 2024 13:32:25 +0530 Subject: [PATCH 09/39] Manually reformatted files --- .github/workflows/linting.yaml | 4 +- .gitignore | 2 +- README.md | 12 +-- examples/graph_visual_with_html.py | 6 +- examples/graph_visual_with_neo4j.py | 30 +++--- examples/lightrag_openai_compatible_demo.py | 27 ++++-- examples/lightrag_siliconcloud_demo.py | 2 +- examples/vram_management_demo.py | 36 +++++-- lightrag/llm.py | 101 ++++++++++++-------- lightrag/utils.py | 46 +++++---- requirements.txt | 4 +- 11 files changed, 175 insertions(+), 95 deletions(-) diff --git a/.github/workflows/linting.yaml b/.github/workflows/linting.yaml index 32886cb0..7c12e0a2 100644 --- a/.github/workflows/linting.yaml +++ b/.github/workflows/linting.yaml @@ -15,7 +15,7 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v2 - + - name: Set up Python uses: actions/setup-python@v2 with: @@ -27,4 +27,4 @@ jobs: pip install pre-commit - name: Run pre-commit - run: pre-commit run --all-files \ No newline at end of file + run: pre-commit run --all-files diff --git a/.gitignore b/.gitignore index 5a41ae32..fd4bd830 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ dickens/ book.txt lightrag-dev/ .idea/ -dist/ \ No newline at end of file +dist/ diff --git a/README.md b/README.md index dbabcb56..abd7ceb9 100644 --- a/README.md +++ b/README.md @@ -58,8 +58,8 @@ from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete ######### # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() -# import nest_asyncio -# nest_asyncio.apply() +# import nest_asyncio +# nest_asyncio.apply() ######### WORKING_DIR = "./dickens" @@ -157,7 +157,7 @@ rag = LightRAG(
Using Ollama Models - + * If you want to use Ollama models, you only need to set LightRAG as follows: ```python @@ -328,8 +328,8 @@ def main(): SET e.entity_type = node.entity_type, e.description = node.description, e.source_id = node.source_id, - e.displayName = node.id - REMOVE e:Entity + e.displayName = node.id + REMOVE e:Entity WITH e, node CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode RETURN count(*) @@ -382,7 +382,7 @@ def main(): except Exception as e: print(f"Error occurred: {e}") - + finally: driver.close() diff --git a/examples/graph_visual_with_html.py b/examples/graph_visual_with_html.py index b455e6de..e4337a54 100644 --- a/examples/graph_visual_with_html.py +++ b/examples/graph_visual_with_html.py @@ -3,7 +3,7 @@ from pyvis.network import Network import random # Load the GraphML file -G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml') +G = nx.read_graphml("./dickens/graph_chunk_entity_relation.graphml") # Create a Pyvis network net = Network(notebook=True) @@ -13,7 +13,7 @@ net.from_nx(G) # Add colors to nodes for node in net.nodes: - node['color'] = "#{:06x}".format(random.randint(0, 0xFFFFFF)) + node["color"] = "#{:06x}".format(random.randint(0, 0xFFFFFF)) # Save and display the network -net.show('knowledge_graph.html') \ No newline at end of file +net.show("knowledge_graph.html") diff --git a/examples/graph_visual_with_neo4j.py b/examples/graph_visual_with_neo4j.py index 22dde368..7377f21c 100644 --- a/examples/graph_visual_with_neo4j.py +++ b/examples/graph_visual_with_neo4j.py @@ -13,6 +13,7 @@ NEO4J_URI = "bolt://localhost:7687" NEO4J_USERNAME = "neo4j" NEO4J_PASSWORD = "your_password" + def convert_xml_to_json(xml_path, output_path): """Converts XML file to JSON and saves the output.""" if not os.path.exists(xml_path): @@ -21,7 +22,7 @@ def convert_xml_to_json(xml_path, output_path): json_data = xml_to_json(xml_path) if json_data: - with open(output_path, 'w', encoding='utf-8') as f: + with open(output_path, "w", encoding="utf-8") as f: json.dump(json_data, f, ensure_ascii=False, indent=2) print(f"JSON file created: {output_path}") return json_data @@ -29,16 +30,18 @@ def convert_xml_to_json(xml_path, output_path): print("Failed to create JSON data") return None + def process_in_batches(tx, query, data, batch_size): """Process data in batches and execute the given query.""" for i in range(0, len(data), batch_size): - batch = data[i:i + batch_size] + batch = data[i : i + batch_size] tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch}) + def main(): # Paths - xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml') - json_file = os.path.join(WORKING_DIR, 'graph_data.json') + xml_file = os.path.join(WORKING_DIR, "graph_chunk_entity_relation.graphml") + json_file = os.path.join(WORKING_DIR, "graph_data.json") # Convert XML to JSON json_data = convert_xml_to_json(xml_file, json_file) @@ -46,8 +49,8 @@ def main(): return # Load nodes and edges - nodes = json_data.get('nodes', []) - edges = json_data.get('edges', []) + nodes = json_data.get("nodes", []) + edges = json_data.get("edges", []) # Neo4j queries create_nodes_query = """ @@ -56,8 +59,8 @@ def main(): SET e.entity_type = node.entity_type, e.description = node.description, e.source_id = node.source_id, - e.displayName = node.id - REMOVE e:Entity + e.displayName = node.id + REMOVE e:Entity WITH e, node CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode RETURN count(*) @@ -100,19 +103,24 @@ def main(): # Execute queries in batches with driver.session() as session: # Insert nodes in batches - session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES) + session.execute_write( + process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES + ) # Insert edges in batches - session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES) + session.execute_write( + process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES + ) # Set displayName and labels session.run(set_displayname_and_labels_query) except Exception as e: print(f"Error occurred: {e}") - + finally: driver.close() + if __name__ == "__main__": main() diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py index 25d3722c..2470fc00 100644 --- a/examples/lightrag_openai_compatible_demo.py +++ b/examples/lightrag_openai_compatible_demo.py @@ -52,6 +52,7 @@ async def test_funcs(): # asyncio.run(test_funcs()) + async def main(): try: embedding_dimension = await get_embedding_dim() @@ -61,35 +62,47 @@ async def main(): working_dir=WORKING_DIR, llm_model_func=llm_model_func, embedding_func=EmbeddingFunc( - embedding_dim=embedding_dimension, max_token_size=8192, func=embedding_func + embedding_dim=embedding_dimension, + max_token_size=8192, + func=embedding_func, ), ) - with open("./book.txt", "r", encoding="utf-8") as f: rag.insert(f.read()) # Perform naive search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) ) # Perform local search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) ) # Perform global search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) + rag.query( + "What are the top themes in this story?", + param=QueryParam(mode="global"), + ) ) # Perform hybrid search print( - rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) + rag.query( + "What are the top themes in this story?", + param=QueryParam(mode="hybrid"), + ) ) except Exception as e: print(f"An error occurred: {e}") + if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/examples/lightrag_siliconcloud_demo.py b/examples/lightrag_siliconcloud_demo.py index 82cab228..a73f16c5 100644 --- a/examples/lightrag_siliconcloud_demo.py +++ b/examples/lightrag_siliconcloud_demo.py @@ -30,7 +30,7 @@ async def embedding_func(texts: list[str]) -> np.ndarray: texts, model="netease-youdao/bce-embedding-base_v1", api_key=os.getenv("SILICONFLOW_API_KEY"), - max_token_size=512 + max_token_size=512, ) diff --git a/examples/vram_management_demo.py b/examples/vram_management_demo.py index ec750254..c173b913 100644 --- a/examples/vram_management_demo.py +++ b/examples/vram_management_demo.py @@ -27,11 +27,12 @@ rag = LightRAG( # Read all .txt files from the TEXT_FILES_DIR directory texts = [] for filename in os.listdir(TEXT_FILES_DIR): - if filename.endswith('.txt'): + if filename.endswith(".txt"): file_path = os.path.join(TEXT_FILES_DIR, filename) - with open(file_path, 'r', encoding='utf-8') as file: + with open(file_path, "r", encoding="utf-8") as file: texts.append(file.read()) + # Batch insert texts into LightRAG with a retry mechanism def insert_texts_with_retry(rag, texts, retries=3, delay=5): for _ in range(retries): @@ -39,37 +40,58 @@ def insert_texts_with_retry(rag, texts, retries=3, delay=5): rag.insert(texts) return except Exception as e: - print(f"Error occurred during insertion: {e}. Retrying in {delay} seconds...") + print( + f"Error occurred during insertion: {e}. Retrying in {delay} seconds..." + ) time.sleep(delay) raise RuntimeError("Failed to insert texts after multiple retries.") + insert_texts_with_retry(rag, texts) # Perform different types of queries and handle potential errors try: - print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))) + print( + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="naive") + ) + ) except Exception as e: print(f"Error performing naive search: {e}") try: - print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))) + print( + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="local") + ) + ) except Exception as e: print(f"Error performing local search: {e}") try: - print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))) + print( + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="global") + ) + ) except Exception as e: print(f"Error performing global search: {e}") try: - print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) + print( + rag.query( + "What are the top themes in this story?", param=QueryParam(mode="hybrid") + ) + ) except Exception as e: print(f"Error performing hybrid search: {e}") + # Function to clear VRAM resources def clear_vram(): os.system("sudo nvidia-smi --gpu-reset") + # Regularly clear VRAM to prevent overflow clear_vram_interval = 3600 # Clear once every hour start_time = time.time() diff --git a/lightrag/llm.py b/lightrag/llm.py index 4dcf535c..eaaa2b75 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -7,7 +7,13 @@ import aiohttp import numpy as np import ollama -from openai import AsyncOpenAI, APIConnectionError, RateLimitError, Timeout, AsyncAzureOpenAI +from openai import ( + AsyncOpenAI, + APIConnectionError, + RateLimitError, + Timeout, + AsyncAzureOpenAI, +) import base64 import struct @@ -70,26 +76,31 @@ async def openai_complete_if_cache( ) return response.choices[0].message.content + @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), ) -async def azure_openai_complete_if_cache(model, +async def azure_openai_complete_if_cache( + model, prompt, system_prompt=None, history_messages=[], base_url=None, api_key=None, - **kwargs): + **kwargs, +): if api_key: os.environ["AZURE_OPENAI_API_KEY"] = api_key if base_url: os.environ["AZURE_OPENAI_ENDPOINT"] = base_url - openai_async_client = AsyncAzureOpenAI(azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), - api_key=os.getenv("AZURE_OPENAI_API_KEY"), - api_version=os.getenv("AZURE_OPENAI_API_VERSION")) + openai_async_client = AsyncAzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + api_key=os.getenv("AZURE_OPENAI_API_KEY"), + api_version=os.getenv("AZURE_OPENAI_API_VERSION"), + ) hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None) messages = [] @@ -114,6 +125,7 @@ async def azure_openai_complete_if_cache(model, ) return response.choices[0].message.content + class BedrockError(Exception): """Generic error for issues related to Amazon Bedrock""" @@ -205,8 +217,12 @@ async def bedrock_complete_if_cache( @lru_cache(maxsize=1) def initialize_hf_model(model_name): - hf_tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto", trust_remote_code=True) - hf_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True) + hf_tokenizer = AutoTokenizer.from_pretrained( + model_name, device_map="auto", trust_remote_code=True + ) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, device_map="auto", trust_remote_code=True + ) if hf_tokenizer.pad_token is None: hf_tokenizer.pad_token = hf_tokenizer.eos_token @@ -328,8 +344,9 @@ async def gpt_4o_mini_complete( **kwargs, ) + async def azure_openai_complete( - prompt, system_prompt=None, history_messages=[], **kwargs + prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: return await azure_openai_complete_if_cache( "conversation-4o-mini", @@ -339,6 +356,7 @@ async def azure_openai_complete( **kwargs, ) + async def bedrock_complete( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: @@ -418,9 +436,11 @@ async def azure_openai_embedding( if base_url: os.environ["AZURE_OPENAI_ENDPOINT"] = base_url - openai_async_client = AsyncAzureOpenAI(azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), - api_key=os.getenv("AZURE_OPENAI_API_KEY"), - api_version=os.getenv("AZURE_OPENAI_API_VERSION")) + openai_async_client = AsyncAzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + api_key=os.getenv("AZURE_OPENAI_API_KEY"), + api_version=os.getenv("AZURE_OPENAI_API_VERSION"), + ) response = await openai_async_client.embeddings.create( model=model, input=texts, encoding_format="float" @@ -440,35 +460,28 @@ async def siliconcloud_embedding( max_token_size: int = 512, api_key: str = None, ) -> np.ndarray: - if api_key and not api_key.startswith('Bearer '): - api_key = 'Bearer ' + api_key + if api_key and not api_key.startswith("Bearer "): + api_key = "Bearer " + api_key - headers = { - "Authorization": api_key, - "Content-Type": "application/json" - } + headers = {"Authorization": api_key, "Content-Type": "application/json"} truncate_texts = [text[0:max_token_size] for text in texts] - payload = { - "model": model, - "input": truncate_texts, - "encoding_format": "base64" - } + payload = {"model": model, "input": truncate_texts, "encoding_format": "base64"} base64_strings = [] async with aiohttp.ClientSession() as session: async with session.post(base_url, headers=headers, json=payload) as response: content = await response.json() - if 'code' in content: + if "code" in content: raise ValueError(content) - base64_strings = [item['embedding'] for item in content['data']] - + base64_strings = [item["embedding"] for item in content["data"]] + embeddings = [] for string in base64_strings: decode_bytes = base64.b64decode(string) n = len(decode_bytes) // 4 - float_array = struct.unpack('<' + 'f' * n, decode_bytes) + float_array = struct.unpack("<" + "f" * n, decode_bytes) embeddings.append(float_array) return np.array(embeddings) @@ -563,6 +576,7 @@ async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray: return embed_text + class Model(BaseModel): """ This is a Pydantic model class named 'Model' that is used to define a custom language model. @@ -580,14 +594,20 @@ class Model(BaseModel): The 'kwargs' dictionary contains the model name and API key to be passed to the function. """ - gen_func: Callable[[Any], str] = Field(..., description="A function that generates the response from the llm. The response must be a string") - kwargs: Dict[str, Any] = Field(..., description="The arguments to pass to the callable function. Eg. the api key, model name, etc") + gen_func: Callable[[Any], str] = Field( + ..., + description="A function that generates the response from the llm. The response must be a string", + ) + kwargs: Dict[str, Any] = Field( + ..., + description="The arguments to pass to the callable function. Eg. the api key, model name, etc", + ) class Config: arbitrary_types_allowed = True -class MultiModel(): +class MultiModel: """ Distributes the load across multiple language models. Useful for circumventing low rate limits with certain api providers especially if you are on the free tier. Could also be used for spliting across diffrent models or providers. @@ -611,26 +631,31 @@ class MultiModel(): ) ``` """ + def __init__(self, models: List[Model]): self._models = models self._current_model = 0 - + def _next_model(self): self._current_model = (self._current_model + 1) % len(self._models) return self._models[self._current_model] async def llm_model_func( - self, - prompt, system_prompt=None, history_messages=[], **kwargs + self, prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: - kwargs.pop("model", None) # stop from overwriting the custom model name + kwargs.pop("model", None) # stop from overwriting the custom model name next_model = self._next_model() - args = dict(prompt=prompt, system_prompt=system_prompt, history_messages=history_messages, **kwargs, **next_model.kwargs) - - return await next_model.gen_func( - **args + args = dict( + prompt=prompt, + system_prompt=system_prompt, + history_messages=history_messages, + **kwargs, + **next_model.kwargs, ) + return await next_model.gen_func(**args) + + if __name__ == "__main__": import asyncio diff --git a/lightrag/utils.py b/lightrag/utils.py index 9a68c16b..0da4a51a 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -185,6 +185,7 @@ def save_data_to_file(data, file_name): with open(file_name, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=4) + def xml_to_json(xml_file): try: tree = ET.parse(xml_file) @@ -194,31 +195,42 @@ def xml_to_json(xml_file): print(f"Root element: {root.tag}") print(f"Root attributes: {root.attrib}") - data = { - "nodes": [], - "edges": [] - } + data = {"nodes": [], "edges": []} # Use namespace - namespace = {'': 'http://graphml.graphdrawing.org/xmlns'} + namespace = {"": "http://graphml.graphdrawing.org/xmlns"} - for node in root.findall('.//node', namespace): + for node in root.findall(".//node", namespace): node_data = { - "id": node.get('id').strip('"'), - "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"') if node.find("./data[@key='d0']", namespace) is not None else "", - "description": node.find("./data[@key='d1']", namespace).text if node.find("./data[@key='d1']", namespace) is not None else "", - "source_id": node.find("./data[@key='d2']", namespace).text if node.find("./data[@key='d2']", namespace) is not None else "" + "id": node.get("id").strip('"'), + "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"') + if node.find("./data[@key='d0']", namespace) is not None + else "", + "description": node.find("./data[@key='d1']", namespace).text + if node.find("./data[@key='d1']", namespace) is not None + else "", + "source_id": node.find("./data[@key='d2']", namespace).text + if node.find("./data[@key='d2']", namespace) is not None + else "", } data["nodes"].append(node_data) - for edge in root.findall('.//edge', namespace): + for edge in root.findall(".//edge", namespace): edge_data = { - "source": edge.get('source').strip('"'), - "target": edge.get('target').strip('"'), - "weight": float(edge.find("./data[@key='d3']", namespace).text) if edge.find("./data[@key='d3']", namespace) is not None else 0.0, - "description": edge.find("./data[@key='d4']", namespace).text if edge.find("./data[@key='d4']", namespace) is not None else "", - "keywords": edge.find("./data[@key='d5']", namespace).text if edge.find("./data[@key='d5']", namespace) is not None else "", - "source_id": edge.find("./data[@key='d6']", namespace).text if edge.find("./data[@key='d6']", namespace) is not None else "" + "source": edge.get("source").strip('"'), + "target": edge.get("target").strip('"'), + "weight": float(edge.find("./data[@key='d3']", namespace).text) + if edge.find("./data[@key='d3']", namespace) is not None + else 0.0, + "description": edge.find("./data[@key='d4']", namespace).text + if edge.find("./data[@key='d4']", namespace) is not None + else "", + "keywords": edge.find("./data[@key='d5']", namespace).text + if edge.find("./data[@key='d5']", namespace) is not None + else "", + "source_id": edge.find("./data[@key='d6']", namespace).text + if edge.find("./data[@key='d6']", namespace) is not None + else "", } data["edges"].append(edge_data) diff --git a/requirements.txt b/requirements.txt index 5b3396fb..98f32b0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,15 @@ accelerate aioboto3 +aiohttp graspologic hnswlib nano-vectordb networkx ollama openai +pyvis tenacity tiktoken torch transformers xxhash -pyvis -aiohttp \ No newline at end of file From 6df870712eb0cc661f6e8d9fe55bdf35de9c670b Mon Sep 17 00:00:00 2001 From: zrguo <49157727+LarFii@users.noreply.github.com> Date: Fri, 25 Oct 2024 19:25:26 +0800 Subject: [PATCH 10/39] fix Step_3_openai_compatible.py --- reproduce/Step_3_openai_compatible.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reproduce/Step_3_openai_compatible.py b/reproduce/Step_3_openai_compatible.py index 2be5ea5c..5e2ef778 100644 --- a/reproduce/Step_3_openai_compatible.py +++ b/reproduce/Step_3_openai_compatible.py @@ -50,8 +50,8 @@ def extract_queries(file_path): async def process_query(query_text, rag_instance, query_param): try: - result, context = await rag_instance.aquery(query_text, param=query_param) - return {"query": query_text, "result": result, "context": context}, None + result = await rag_instance.aquery(query_text, param=query_param) + return {"query": query_text, "result": result}, None except Exception as e: return None, {"query": query_text, "error": str(e)} From 3325d97fb7cf46faf6cf499e4c92936e0f1ab0e3 Mon Sep 17 00:00:00 2001 From: jatin009v Date: Fri, 25 Oct 2024 18:39:55 +0530 Subject: [PATCH 11/39] Key Enhancements: Error Handling: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handled potential FileNotFoundError for README.md and requirements.txt. Checked for missing required metadata and raised an informative error if any are missing. Automated Package Discovery: Replaced packages=["lightrag"] with setuptools.find_packages() to automatically find sub-packages and exclude test or documentation directories. Additional Metadata: Added Development Status in classifiers to indicate a "Beta" release (modify based on the project's maturity). Used project_urls to link documentation, source code, and an issue tracker, which are standard for open-source projects. Compatibility: Included include_package_data=True to include additional files specified in MANIFEST.in. These changes enhance the readability, reliability, and openness of the code, making it more contributor-friendly and ensuring itโ€™s ready for open-source distribution. --- setup.py | 74 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/setup.py b/setup.py index 47222420..bdf49f02 100644 --- a/setup.py +++ b/setup.py @@ -1,39 +1,71 @@ import setuptools +from pathlib import Path -with open("README.md", "r", encoding="utf-8") as fh: - long_description = fh.read() +# Reading the long description from README.md +def read_long_description(): + try: + return Path("README.md").read_text(encoding="utf-8") + except FileNotFoundError: + return "A description of LightRAG is currently unavailable." +# Retrieving metadata from __init__.py +def retrieve_metadata(): + vars2find = ["__author__", "__version__", "__url__"] + vars2readme = {} + try: + with open("./lightrag/__init__.py") as f: + for line in f.readlines(): + for v in vars2find: + if line.startswith(v): + line = line.replace(" ", "").replace('"', "").replace("'", "").strip() + vars2readme[v] = line.split("=")[1] + except FileNotFoundError: + raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.") + + # Checking if all required variables are found + missing_vars = [v for v in vars2find if v not in vars2readme] + if missing_vars: + raise ValueError(f"Missing required metadata variables in __init__.py: {missing_vars}") + + return vars2readme -vars2find = ["__author__", "__version__", "__url__"] -vars2readme = {} -with open("./lightrag/__init__.py") as f: - for line in f.readlines(): - for v in vars2find: - if line.startswith(v): - line = line.replace(" ", "").replace('"', "").replace("'", "").strip() - vars2readme[v] = line.split("=")[1] +# Reading dependencies from requirements.txt +def read_requirements(): + deps = [] + try: + with open("./requirements.txt") as f: + deps = [line.strip() for line in f if line.strip()] + except FileNotFoundError: + print("Warning: 'requirements.txt' not found. No dependencies will be installed.") + return deps -deps = [] -with open("./requirements.txt") as f: - for line in f.readlines(): - if not line.strip(): - continue - deps.append(line.strip()) +metadata = retrieve_metadata() +long_description = read_long_description() +requirements = read_requirements() setuptools.setup( name="lightrag-hku", - url=vars2readme["__url__"], - version=vars2readme["__version__"], - author=vars2readme["__author__"], + url=metadata["__url__"], + version=metadata["__version__"], + author=metadata["__author__"], description="LightRAG: Simple and Fast Retrieval-Augmented Generation", long_description=long_description, long_description_content_type="text/markdown", - packages=["lightrag"], + packages=setuptools.find_packages(exclude=("tests*", "docs*")), # Automatically find packages classifiers=[ + "Development Status :: 4 - Beta", "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", ], python_requires=">=3.9", - install_requires=deps, + install_requires=requirements, + include_package_data=True, # Includes non-code files from MANIFEST.in + project_urls={ # Additional project metadata + "Documentation": metadata.get("__url__", ""), + "Source": metadata.get("__url__", ""), + "Tracker": f"{metadata.get('__url__', '')}/issues" if metadata.get("__url__") else "" + }, ) From af1a7f66fa703ba42904fcbe70d3ef7fff317bbf Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 00:37:03 +0800 Subject: [PATCH 12/39] add Algorithm Flowchart --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index dbabcb56..f2f5c20e 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,10 @@ This repository hosts the code of LightRAG. The structure of this code is based - [x] [2024.10.16]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! - [x] [2024.10.15]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! +## Algorithm Flowchart + + + ## Install * Install from source (Recommend) From aad2e1f2d609e7626e29da32e699e647930e8493 Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 00:37:46 +0800 Subject: [PATCH 13/39] add Algorithm FLowchart --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f2f5c20e..0f8659b1 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ This repository hosts the code of LightRAG. The structure of this code is based ## Algorithm Flowchart +![LightRAG_Self excalidraw](https://github.com/user-attachments/assets/aa5c4892-2e44-49e6-a116-2403ed80a1a3) ## Install From 226f6f3d87febd3041017d3d0299a00138ce8832 Mon Sep 17 00:00:00 2001 From: tackhwa Date: Sat, 26 Oct 2024 02:20:23 +0800 Subject: [PATCH 14/39] fix hf output bug --- lightrag/llm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lightrag/llm.py b/lightrag/llm.py index 4dcf535c..692937fb 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -266,10 +266,11 @@ async def hf_model_if_cache( input_ids = hf_tokenizer( input_prompt, return_tensors="pt", padding=True, truncation=True ).to("cuda") + inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()} output = hf_model.generate( **input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True ) - response_text = hf_tokenizer.decode(output[0], skip_special_tokens=True) + response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) if hashing_kv is not None: await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}}) return response_text From 87f8b7dba1a334459b7401a6b88e68fd7e0ecc33 Mon Sep 17 00:00:00 2001 From: tackhwa <55059307+tackhwa@users.noreply.github.com> Date: Sat, 26 Oct 2024 02:42:40 +0800 Subject: [PATCH 15/39] Update token length --- lightrag/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/llm.py b/lightrag/llm.py index 692937fb..ab459fc7 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -268,7 +268,7 @@ async def hf_model_if_cache( ).to("cuda") inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()} output = hf_model.generate( - **input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True + **input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True ) response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) if hashing_kv is not None: From eec29d041aaf314d69ff4eae7661dcdc9a21b107 Mon Sep 17 00:00:00 2001 From: Yazington Date: Sat, 26 Oct 2024 00:11:21 -0400 Subject: [PATCH 16/39] fixing bug --- lightrag/lightrag.py | 6 ++++-- lightrag/operate.py | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 5137af42..3004f5ed 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -85,7 +85,9 @@ class LightRAG: # LLM llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete# - llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' + llm_model_name: str = ( + "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' + ) llm_model_max_token_size: int = 32768 llm_model_max_async: int = 16 @@ -208,7 +210,7 @@ class LightRAG: logger.info("[Entity Extraction]...") maybe_new_kg = await extract_entities( inserting_chunks, - knwoledge_graph_inst=self.chunk_entity_relation_graph, + knowledge_graph_inst=self.chunk_entity_relation_graph, entity_vdb=self.entities_vdb, relationships_vdb=self.relationships_vdb, global_config=asdict(self), diff --git a/lightrag/operate.py b/lightrag/operate.py index a0729cd8..8a6820f5 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -124,14 +124,14 @@ async def _handle_single_relationship_extraction( async def _merge_nodes_then_upsert( entity_name: str, nodes_data: list[dict], - knwoledge_graph_inst: BaseGraphStorage, + knowledge_graph_inst: BaseGraphStorage, global_config: dict, ): already_entitiy_types = [] already_source_ids = [] already_description = [] - already_node = await knwoledge_graph_inst.get_node(entity_name) + already_node = await knowledge_graph_inst.get_node(entity_name) if already_node is not None: already_entitiy_types.append(already_node["entity_type"]) already_source_ids.extend( @@ -160,7 +160,7 @@ async def _merge_nodes_then_upsert( description=description, source_id=source_id, ) - await knwoledge_graph_inst.upsert_node( + await knowledge_graph_inst.upsert_node( entity_name, node_data=node_data, ) @@ -172,7 +172,7 @@ async def _merge_edges_then_upsert( src_id: str, tgt_id: str, edges_data: list[dict], - knwoledge_graph_inst: BaseGraphStorage, + knowledge_graph_inst: BaseGraphStorage, global_config: dict, ): already_weights = [] @@ -180,8 +180,8 @@ async def _merge_edges_then_upsert( already_description = [] already_keywords = [] - if await knwoledge_graph_inst.has_edge(src_id, tgt_id): - already_edge = await knwoledge_graph_inst.get_edge(src_id, tgt_id) + if await knowledge_graph_inst.has_edge(src_id, tgt_id): + already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id) already_weights.append(already_edge["weight"]) already_source_ids.extend( split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP]) @@ -202,8 +202,8 @@ async def _merge_edges_then_upsert( set([dp["source_id"] for dp in edges_data] + already_source_ids) ) for need_insert_id in [src_id, tgt_id]: - if not (await knwoledge_graph_inst.has_node(need_insert_id)): - await knwoledge_graph_inst.upsert_node( + if not (await knowledge_graph_inst.has_node(need_insert_id)): + await knowledge_graph_inst.upsert_node( need_insert_id, node_data={ "source_id": source_id, @@ -214,7 +214,7 @@ async def _merge_edges_then_upsert( description = await _handle_entity_relation_summary( (src_id, tgt_id), description, global_config ) - await knwoledge_graph_inst.upsert_edge( + await knowledge_graph_inst.upsert_edge( src_id, tgt_id, edge_data=dict( @@ -237,7 +237,7 @@ async def _merge_edges_then_upsert( async def extract_entities( chunks: dict[str, TextChunkSchema], - knwoledge_graph_inst: BaseGraphStorage, + knowledge_graph_inst: BaseGraphStorage, entity_vdb: BaseVectorStorage, relationships_vdb: BaseVectorStorage, global_config: dict, @@ -341,13 +341,13 @@ async def extract_entities( maybe_edges[tuple(sorted(k))].extend(v) all_entities_data = await asyncio.gather( *[ - _merge_nodes_then_upsert(k, v, knwoledge_graph_inst, global_config) + _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config) for k, v in maybe_nodes.items() ] ) all_relationships_data = await asyncio.gather( *[ - _merge_edges_then_upsert(k[0], k[1], v, knwoledge_graph_inst, global_config) + _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config) for k, v in maybe_edges.items() ] ) @@ -384,7 +384,7 @@ async def extract_entities( } await relationships_vdb.upsert(data_for_vdb) - return knwoledge_graph_inst + return knowledge_graph_inst async def local_query( From e07b9f05306e2b21b1acfe47b3c47118bf658de9 Mon Sep 17 00:00:00 2001 From: zrguo <49157727+LarFii@users.noreply.github.com> Date: Sat, 26 Oct 2024 14:04:11 +0800 Subject: [PATCH 17/39] Update graph_visual_with_html.py --- examples/graph_visual_with_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/graph_visual_with_html.py b/examples/graph_visual_with_html.py index e4337a54..11279b3a 100644 --- a/examples/graph_visual_with_html.py +++ b/examples/graph_visual_with_html.py @@ -6,7 +6,7 @@ import random G = nx.read_graphml("./dickens/graph_chunk_entity_relation.graphml") # Create a Pyvis network -net = Network(notebook=True) +net = Network(height="100vh", notebook=True) # Convert NetworkX graph to Pyvis network net.from_nx(G) From ea3e13b522afdb7533d1b1098b5d2461f98c1ad6 Mon Sep 17 00:00:00 2001 From: LarFii <834462287@qq.com> Date: Sat, 26 Oct 2024 14:40:17 +0800 Subject: [PATCH 18/39] update version --- lightrag/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/__init__.py b/lightrag/__init__.py index db81e005..8e76a260 100644 --- a/lightrag/__init__.py +++ b/lightrag/__init__.py @@ -1,5 +1,5 @@ from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam -__version__ = "0.0.7" +__version__ = "0.0.8" __author__ = "Zirui Guo" __url__ = "https://github.com/HKUDS/LightRAG" From dba7dad6dff4c588b6fd6c0d59830f910df18650 Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 15:56:48 +0800 Subject: [PATCH 19/39] [feat] Add API server implementation and endpoints --- README.md | 119 ++++++++++++++ .../lightrag_api_openai_compatible_demo.py | 153 ++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 examples/lightrag_api_openai_compatible_demo.py diff --git a/README.md b/README.md index 7fab9a01..d11b1691 100644 --- a/README.md +++ b/README.md @@ -397,6 +397,125 @@ if __name__ == "__main__":
+## API Server Implementation + +LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests. + +### Setting up the API Server +
+Click to expand setup instructions + +1. First, ensure you have the required dependencies: +```bash +pip install fastapi uvicorn pydantic +``` + +2. Set up your environment variables: +```bash +export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" +``` + +3. Run the API server: +```bash +python examples/lightrag_api_openai_compatible_demo.py +``` + +The server will start on `http://0.0.0.0:8020`. +
+ +### API Endpoints + +The API server provides the following endpoints: + +#### 1. Query Endpoint +
+Click to view Query endpoint details + +- **URL:** `/query` +- **Method:** POST +- **Body:** +```json +{ + "query": "Your question here", + "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/query" \ + -H "Content-Type: application/json" \ + -d '{"query": "What are the main themes?", "mode": "hybrid"}' +``` +
+ +#### 2. Insert Text Endpoint +
+Click to view Insert Text endpoint details + +- **URL:** `/insert` +- **Method:** POST +- **Body:** +```json +{ + "text": "Your text content here" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/insert" \ + -H "Content-Type: application/json" \ + -d '{"text": "Content to be inserted into RAG"}' +``` +
+ +#### 3. Insert File Endpoint +
+Click to view Insert File endpoint details + +- **URL:** `/insert_file` +- **Method:** POST +- **Body:** +```json +{ + "file_path": "path/to/your/file.txt" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/insert_file" \ + -H "Content-Type: application/json" \ + -d '{"file_path": "./book.txt"}' +``` +
+ +#### 4. Health Check Endpoint +
+Click to view Health Check endpoint details + +- **URL:** `/health` +- **Method:** GET +- **Example:** +```bash +curl -X GET "http://127.0.0.1:8020/health" +``` +
+ +### Configuration + +The API server can be configured using environment variables: +- `RAG_DIR`: Directory for storing the RAG index (default: "index_default") +- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers + +### Error Handling +
+Click to view error handling details + +The API includes comprehensive error handling: +- File not found errors (404) +- Processing errors (500) +- Supports multiple file encodings (UTF-8 and GBK) +
+ ## Evaluation ### Dataset The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain). diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py new file mode 100644 index 00000000..f8d105ea --- /dev/null +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -0,0 +1,153 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import os +from lightrag import LightRAG, QueryParam +from lightrag.llm import openai_complete_if_cache, openai_embedding +from lightrag.utils import EmbeddingFunc +import numpy as np +from typing import Optional +import asyncio +import nest_asyncio + +# Apply nest_asyncio to solve event loop issues +nest_asyncio.apply() + +DEFAULT_RAG_DIR="index_default" +app = FastAPI(title="LightRAG API", description="API for RAG operations") + +# Configure working directory +WORKING_DIR = os.environ.get('RAG_DIR', f'{DEFAULT_RAG_DIR}') +print(f"WORKING_DIR: {WORKING_DIR}") +if not os.path.exists(WORKING_DIR): + os.mkdir(WORKING_DIR) + +# LLM model function +async def llm_model_func( + prompt, system_prompt=None, history_messages=[], **kwargs +) -> str: + return await openai_complete_if_cache( + "gpt-4o-mini", + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key='YOUR_API_KEY', + base_url="YourURL/v1", + **kwargs, + ) + +# Embedding function +async def embedding_func(texts: list[str]) -> np.ndarray: + return await openai_embedding( + texts, + model="text-embedding-3-large", + api_key='YOUR_API_KEY', + base_url="YourURL/v1", + ) + +# Initialize RAG instance +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=3072, max_token_size=8192, func=embedding_func + ), +) + +# Data models +class QueryRequest(BaseModel): + query: str + mode: str = "hybrid" + +class InsertRequest(BaseModel): + text: str + +class InsertFileRequest(BaseModel): + file_path: str + +class Response(BaseModel): + status: str + data: Optional[str] = None + message: Optional[str] = None + +# API routes +@app.post("/query", response_model=Response) +async def query_endpoint(request: QueryRequest): + try: + loop = asyncio.get_event_loop() + result = await loop.run_in_executor( + None, + lambda: rag.query(request.query, param=QueryParam(mode=request.mode)) + ) + return Response( + status="success", + data=result + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/insert", response_model=Response) +async def insert_endpoint(request: InsertRequest): + try: + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, lambda: rag.insert(request.text)) + return Response( + status="success", + message="Text inserted successfully" + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/insert_file", response_model=Response) +async def insert_file(request: InsertFileRequest): + try: + # Check if file exists + if not os.path.exists(request.file_path): + raise HTTPException( + status_code=404, + detail=f"File not found: {request.file_path}" + ) + + # Read file content + try: + with open(request.file_path, 'r', encoding='utf-8') as f: + content = f.read() + except UnicodeDecodeError: + # If UTF-8 decoding fails, try other encodings + with open(request.file_path, 'r', encoding='gbk') as f: + content = f.read() + + # Insert file content + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, lambda: rag.insert(content)) + + return Response( + status="success", + message=f"File content from {request.file_path} inserted successfully" + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/health") +async def health_check(): + return {"status": "healthy"} + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8020) + +# Usage example +# To run the server, use the following command in your terminal: +# python lightrag_api_openai_compatible_demo.py + +# Example requests: +# 1. Query: +# curl -X POST "http://127.0.0.1:8020/query" -H "Content-Type: application/json" -d '{"query": "your query here", "mode": "hybrid"}' + +# 2. Insert text: +# curl -X POST "http://127.0.0.1:8020/insert" -H "Content-Type: application/json" -d '{"text": "your text here"}' + +# 3. Insert file: +# curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}' + +# 4. Health check: +# curl -X GET "http://127.0.0.1:8020/health" \ No newline at end of file From 3d7b05b3e7d78435929ef5f1d878b9968ce5519d Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 16:00:30 +0800 Subject: [PATCH 20/39] Refactor code formatting in lightrag_api_openai_compatible_demo.py --- .../lightrag_api_openai_compatible_demo.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index f8d105ea..ad9560dc 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -12,7 +12,7 @@ import nest_asyncio # Apply nest_asyncio to solve event loop issues nest_asyncio.apply() -DEFAULT_RAG_DIR="index_default" +DEFAULT_RAG_DIR = "index_default" app = FastAPI(title="LightRAG API", description="API for RAG operations") # Configure working directory @@ -22,6 +22,8 @@ if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # LLM model function + + async def llm_model_func( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: @@ -36,6 +38,8 @@ async def llm_model_func( ) # Embedding function + + async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embedding( texts, @@ -54,29 +58,37 @@ rag = LightRAG( ) # Data models + + class QueryRequest(BaseModel): query: str mode: str = "hybrid" + class InsertRequest(BaseModel): text: str + class InsertFileRequest(BaseModel): file_path: str + class Response(BaseModel): status: str data: Optional[str] = None message: Optional[str] = None # API routes + + @app.post("/query", response_model=Response) async def query_endpoint(request: QueryRequest): try: loop = asyncio.get_event_loop() result = await loop.run_in_executor( - None, - lambda: rag.query(request.query, param=QueryParam(mode=request.mode)) + None, + lambda: rag.query( + request.query, param=QueryParam(mode=request.mode)) ) return Response( status="success", @@ -85,6 +97,7 @@ async def query_endpoint(request: QueryRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @app.post("/insert", response_model=Response) async def insert_endpoint(request: InsertRequest): try: @@ -97,6 +110,7 @@ async def insert_endpoint(request: InsertRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @app.post("/insert_file", response_model=Response) async def insert_file(request: InsertFileRequest): try: @@ -106,7 +120,7 @@ async def insert_file(request: InsertFileRequest): status_code=404, detail=f"File not found: {request.file_path}" ) - + # Read file content try: with open(request.file_path, 'r', encoding='utf-8') as f: @@ -115,11 +129,11 @@ async def insert_file(request: InsertFileRequest): # If UTF-8 decoding fails, try other encodings with open(request.file_path, 'r', encoding='gbk') as f: content = f.read() - + # Insert file content loop = asyncio.get_event_loop() await loop.run_in_executor(None, lambda: rag.insert(content)) - + return Response( status="success", message=f"File content from {request.file_path} inserted successfully" @@ -127,6 +141,7 @@ async def insert_file(request: InsertFileRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @app.get("/health") async def health_check(): return {"status": "healthy"} @@ -150,4 +165,4 @@ if __name__ == "__main__": # curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}' # 4. Health check: -# curl -X GET "http://127.0.0.1:8020/health" \ No newline at end of file +# curl -X GET "http://127.0.0.1:8020/health" From 981be9e569af127e755612eaf94c07c19b193f2f Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 16:09:36 +0800 Subject: [PATCH 21/39] Refactor code formatting in lightrag_api_openai_compatible_demo.py --- .../lightrag_api_openai_compatible_demo.py | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index ad9560dc..2cd262bb 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -16,7 +16,7 @@ DEFAULT_RAG_DIR = "index_default" app = FastAPI(title="LightRAG API", description="API for RAG operations") # Configure working directory -WORKING_DIR = os.environ.get('RAG_DIR', f'{DEFAULT_RAG_DIR}') +WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}") print(f"WORKING_DIR: {WORKING_DIR}") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) @@ -32,11 +32,12 @@ async def llm_model_func( prompt, system_prompt=system_prompt, history_messages=history_messages, - api_key='YOUR_API_KEY', + api_key="YOUR_API_KEY", base_url="YourURL/v1", **kwargs, ) + # Embedding function @@ -44,10 +45,11 @@ async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embedding( texts, model="text-embedding-3-large", - api_key='YOUR_API_KEY', + api_key="YOUR_API_KEY", base_url="YourURL/v1", ) + # Initialize RAG instance rag = LightRAG( working_dir=WORKING_DIR, @@ -78,6 +80,7 @@ class Response(BaseModel): data: Optional[str] = None message: Optional[str] = None + # API routes @@ -86,14 +89,9 @@ async def query_endpoint(request: QueryRequest): try: loop = asyncio.get_event_loop() result = await loop.run_in_executor( - None, - lambda: rag.query( - request.query, param=QueryParam(mode=request.mode)) - ) - return Response( - status="success", - data=result + None, lambda: rag.query(request.query, param=QueryParam(mode=request.mode)) ) + return Response(status="success", data=result) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -103,10 +101,7 @@ async def insert_endpoint(request: InsertRequest): try: loop = asyncio.get_event_loop() await loop.run_in_executor(None, lambda: rag.insert(request.text)) - return Response( - status="success", - message="Text inserted successfully" - ) + return Response(status="success", message="Text inserted successfully") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -117,17 +112,16 @@ async def insert_file(request: InsertFileRequest): # Check if file exists if not os.path.exists(request.file_path): raise HTTPException( - status_code=404, - detail=f"File not found: {request.file_path}" + status_code=404, detail=f"File not found: {request.file_path}" ) # Read file content try: - with open(request.file_path, 'r', encoding='utf-8') as f: + with open(request.file_path, "r", encoding="utf-8") as f: content = f.read() except UnicodeDecodeError: # If UTF-8 decoding fails, try other encodings - with open(request.file_path, 'r', encoding='gbk') as f: + with open(request.file_path, "r", encoding="gbk") as f: content = f.read() # Insert file content @@ -136,7 +130,7 @@ async def insert_file(request: InsertFileRequest): return Response( status="success", - message=f"File content from {request.file_path} inserted successfully" + message=f"File content from {request.file_path} inserted successfully", ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -146,8 +140,10 @@ async def insert_file(request: InsertFileRequest): async def health_check(): return {"status": "healthy"} + if __name__ == "__main__": import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8020) # Usage example From 8deb30aa205de458b727a24345f8b4a511dabafc Mon Sep 17 00:00:00 2001 From: tackhwa Date: Sat, 26 Oct 2024 16:11:15 +0800 Subject: [PATCH 22/39] support lmdeploy backend --- examples/lightrag_lmdeploy_demo.py | 74 +++++++++++++++++++++ lightrag/llm.py | 100 +++++++++++++++++++++++++++++ requirements.txt | 1 + 3 files changed, 175 insertions(+) create mode 100644 examples/lightrag_lmdeploy_demo.py diff --git a/examples/lightrag_lmdeploy_demo.py b/examples/lightrag_lmdeploy_demo.py new file mode 100644 index 00000000..ea7ace0e --- /dev/null +++ b/examples/lightrag_lmdeploy_demo.py @@ -0,0 +1,74 @@ +import os + +from lightrag import LightRAG, QueryParam +from lightrag.llm import lmdeploy_model_if_cache, hf_embedding +from lightrag.utils import EmbeddingFunc +from transformers import AutoModel, AutoTokenizer + +WORKING_DIR = "./dickens" + +if not os.path.exists(WORKING_DIR): + os.mkdir(WORKING_DIR) + +async def lmdeploy_model_complete( + prompt=None, system_prompt=None, history_messages=[], **kwargs +) -> str: + model_name = kwargs["hashing_kv"].global_config["llm_model_name"] + return await lmdeploy_model_if_cache( + model_name, + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + ## please specify chat_template if your local path does not follow original HF file name, + ## or model_name is a pytorch model on huggingface.co, + ## you can refer to https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/model.py + ## for a list of chat_template available in lmdeploy. + chat_template = "llama3", + # model_format ='awq', # if you are using awq quantization model. + # quant_policy=8, # if you want to use online kv cache, 4=kv int4, 8=kv int8. + **kwargs, + ) + + +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=lmdeploy_model_complete, + llm_model_name="meta-llama/Llama-3.1-8B-Instruct", # please use definite path for local model + embedding_func=EmbeddingFunc( + embedding_dim=384, + max_token_size=5000, + func=lambda texts: hf_embedding( + texts, + tokenizer=AutoTokenizer.from_pretrained( + "sentence-transformers/all-MiniLM-L6-v2" + ), + embed_model=AutoModel.from_pretrained( + "sentence-transformers/all-MiniLM-L6-v2" + ), + ), + ), +) + + +with open("./book.txt", "r", encoding="utf-8") as f: + rag.insert(f.read()) + +# Perform naive search +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) +) + +# Perform local search +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) +) + +# Perform global search +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) +) + +# Perform hybrid search +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) +) diff --git a/lightrag/llm.py b/lightrag/llm.py index bb0d6063..028084bd 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -322,6 +322,106 @@ async def ollama_model_if_cache( return result +@lru_cache(maxsize=1) +def initialize_lmdeploy_pipeline(model, tp=1, chat_template=None, log_level='WARNING', model_format='hf', quant_policy=0): + from lmdeploy import pipeline, ChatTemplateConfig, TurbomindEngineConfig + lmdeploy_pipe = pipeline( + model_path=model, + backend_config=TurbomindEngineConfig(tp=tp, model_format=model_format, quant_policy=quant_policy), + chat_template_config=ChatTemplateConfig(model_name=chat_template) if chat_template else None, + log_level='WARNING') + return lmdeploy_pipe + + +async def lmdeploy_model_if_cache( + model, prompt, system_prompt=None, history_messages=[], + chat_template=None, model_format='hf',quant_policy=0, **kwargs +) -> str: + """ + Args: + model (str): The path to the model. + It could be one of the following options: + - i) A local directory path of a turbomind model which is + converted by `lmdeploy convert` command or download + from ii) and iii). + - ii) The model_id of a lmdeploy-quantized model hosted + inside a model repo on huggingface.co, such as + "InternLM/internlm-chat-20b-4bit", + "lmdeploy/llama2-chat-70b-4bit", etc. + - iii) The model_id of a model hosted inside a model repo + on huggingface.co, such as "internlm/internlm-chat-7b", + "Qwen/Qwen-7B-Chat ", "baichuan-inc/Baichuan2-7B-Chat" + and so on. + chat_template (str): needed when model is a pytorch model on + huggingface.co, such as "internlm-chat-7b", + "Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on, + and when the model name of local path did not match the original model name in HF. + tp (int): tensor parallel + prompt (Union[str, List[str]]): input texts to be completed. + do_preprocess (bool): whether pre-process the messages. Default to + True, which means chat_template will be applied. + skip_special_tokens (bool): Whether or not to remove special tokens + in the decoding. Default to be False. + do_sample (bool): Whether or not to use sampling, use greedy decoding otherwise. + Default to be False, which means greedy decoding will be applied. + """ + try: + import lmdeploy + from lmdeploy import version_info, GenerationConfig + except: + raise ImportError("Please install lmdeploy before intialize lmdeploy backend.") + + kwargs.pop("response_format", None) + max_new_tokens = kwargs.pop("max_tokens", 512) + tp = kwargs.pop('tp', 1) + skip_special_tokens = kwargs.pop('skip_special_tokens', False) + do_preprocess = kwargs.pop('do_preprocess', True) + do_sample = kwargs.pop('do_sample', False) + gen_params = kwargs + + version = version_info + if do_sample is not None and version < (0, 6, 0): + raise RuntimeError( + '`do_sample` parameter is not supported by lmdeploy until ' + f'v0.6.0, but currently using lmdeloy {lmdeploy.__version__}') + else: + do_sample = True + gen_params.update(do_sample=do_sample) + + lmdeploy_pipe = initialize_lmdeploy_pipeline( + model=model, + tp=tp, + chat_template=chat_template, + model_format=model_format, + quant_policy=quant_policy, + log_level='WARNING') + + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + + hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None) + messages.extend(history_messages) + messages.append({"role": "user", "content": prompt}) + if hashing_kv is not None: + args_hash = compute_args_hash(model, messages) + if_cache_return = await hashing_kv.get_by_id(args_hash) + if if_cache_return is not None: + return if_cache_return["return"] + + gen_config = GenerationConfig( + skip_special_tokens=skip_special_tokens, max_new_tokens=max_new_tokens, **gen_params) + + response = "" + async for res in lmdeploy_pipe.generate(messages, gen_config=gen_config, + do_preprocess=do_preprocess, stream_response=False, session_id=1): + response += res.response + + if hashing_kv is not None: + await hashing_kv.upsert({args_hash: {"return": response, "model": model}}) + return response + + async def gpt_4o_complete( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: diff --git a/requirements.txt b/requirements.txt index 98f32b0a..6b0e025a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ tiktoken torch transformers xxhash +# lmdeploy[all] From 883d6b7cc7fd1df29717ab5858f194cfbf0e3246 Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 16:12:10 +0800 Subject: [PATCH 23/39] Refactor code formatting in lightrag_api_openai_compatible_demo.py --- lightrag/lightrag.py | 4 +--- lightrag/llm.py | 4 +++- setup.py | 31 ++++++++++++++++++++++++------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 3004f5ed..b84e22ef 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -85,9 +85,7 @@ class LightRAG: # LLM llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete# - llm_model_name: str = ( - "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' - ) + llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' llm_model_max_token_size: int = 32768 llm_model_max_async: int = 16 diff --git a/lightrag/llm.py b/lightrag/llm.py index bb0d6063..fd6b72d6 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -286,7 +286,9 @@ async def hf_model_if_cache( output = hf_model.generate( **input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True ) - response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) + response_text = hf_tokenizer.decode( + output[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True + ) if hashing_kv is not None: await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}}) return response_text diff --git a/setup.py b/setup.py index bdf49f02..1b1f65f0 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import setuptools from pathlib import Path + # Reading the long description from README.md def read_long_description(): try: @@ -8,6 +9,7 @@ def read_long_description(): except FileNotFoundError: return "A description of LightRAG is currently unavailable." + # Retrieving metadata from __init__.py def retrieve_metadata(): vars2find = ["__author__", "__version__", "__url__"] @@ -17,18 +19,26 @@ def retrieve_metadata(): for line in f.readlines(): for v in vars2find: if line.startswith(v): - line = line.replace(" ", "").replace('"', "").replace("'", "").strip() + line = ( + line.replace(" ", "") + .replace('"', "") + .replace("'", "") + .strip() + ) vars2readme[v] = line.split("=")[1] except FileNotFoundError: raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.") - + # Checking if all required variables are found missing_vars = [v for v in vars2find if v not in vars2readme] if missing_vars: - raise ValueError(f"Missing required metadata variables in __init__.py: {missing_vars}") - + raise ValueError( + f"Missing required metadata variables in __init__.py: {missing_vars}" + ) + return vars2readme + # Reading dependencies from requirements.txt def read_requirements(): deps = [] @@ -36,9 +46,12 @@ def read_requirements(): with open("./requirements.txt") as f: deps = [line.strip() for line in f if line.strip()] except FileNotFoundError: - print("Warning: 'requirements.txt' not found. No dependencies will be installed.") + print( + "Warning: 'requirements.txt' not found. No dependencies will be installed." + ) return deps + metadata = retrieve_metadata() long_description = read_long_description() requirements = read_requirements() @@ -51,7 +64,9 @@ setuptools.setup( description="LightRAG: Simple and Fast Retrieval-Augmented Generation", long_description=long_description, long_description_content_type="text/markdown", - packages=setuptools.find_packages(exclude=("tests*", "docs*")), # Automatically find packages + packages=setuptools.find_packages( + exclude=("tests*", "docs*") + ), # Automatically find packages classifiers=[ "Development Status :: 4 - Beta", "Programming Language :: Python :: 3", @@ -66,6 +81,8 @@ setuptools.setup( project_urls={ # Additional project metadata "Documentation": metadata.get("__url__", ""), "Source": metadata.get("__url__", ""), - "Tracker": f"{metadata.get('__url__', '')}/issues" if metadata.get("__url__") else "" + "Tracker": f"{metadata.get('__url__', '')}/issues" + if metadata.get("__url__") + else "", }, ) From 2e703296d5e9f4a15547c1d1be3ecb53eab1925c Mon Sep 17 00:00:00 2001 From: tackhwa Date: Sat, 26 Oct 2024 16:13:18 +0800 Subject: [PATCH 24/39] pre-commit --- examples/lightrag_lmdeploy_demo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/lightrag_lmdeploy_demo.py b/examples/lightrag_lmdeploy_demo.py index ea7ace0e..aeb96f71 100644 --- a/examples/lightrag_lmdeploy_demo.py +++ b/examples/lightrag_lmdeploy_demo.py @@ -10,10 +10,11 @@ WORKING_DIR = "./dickens" if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) + async def lmdeploy_model_complete( prompt=None, system_prompt=None, history_messages=[], **kwargs ) -> str: - model_name = kwargs["hashing_kv"].global_config["llm_model_name"] + model_name = kwargs["hashing_kv"].global_config["llm_model_name"] return await lmdeploy_model_if_cache( model_name, prompt, @@ -23,7 +24,7 @@ async def lmdeploy_model_complete( ## or model_name is a pytorch model on huggingface.co, ## you can refer to https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/model.py ## for a list of chat_template available in lmdeploy. - chat_template = "llama3", + chat_template="llama3", # model_format ='awq', # if you are using awq quantization model. # quant_policy=8, # if you want to use online kv cache, 4=kv int4, 8=kv int8. **kwargs, @@ -33,7 +34,7 @@ async def lmdeploy_model_complete( rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=lmdeploy_model_complete, - llm_model_name="meta-llama/Llama-3.1-8B-Instruct", # please use definite path for local model + llm_model_name="meta-llama/Llama-3.1-8B-Instruct", # please use definite path for local model embedding_func=EmbeddingFunc( embedding_dim=384, max_token_size=5000, From 2cf3a85a0f09094372ae632cfce95cf1f649de76 Mon Sep 17 00:00:00 2001 From: tackhwa Date: Sat, 26 Oct 2024 16:24:35 +0800 Subject: [PATCH 25/39] update do_preprocess --- lightrag/llm.py | 77 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/lightrag/llm.py b/lightrag/llm.py index 028084bd..d86886ea 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -286,7 +286,9 @@ async def hf_model_if_cache( output = hf_model.generate( **input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True ) - response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) + response_text = hf_tokenizer.decode( + output[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True + ) if hashing_kv is not None: await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}}) return response_text @@ -323,19 +325,38 @@ async def ollama_model_if_cache( @lru_cache(maxsize=1) -def initialize_lmdeploy_pipeline(model, tp=1, chat_template=None, log_level='WARNING', model_format='hf', quant_policy=0): +def initialize_lmdeploy_pipeline( + model, + tp=1, + chat_template=None, + log_level="WARNING", + model_format="hf", + quant_policy=0, +): from lmdeploy import pipeline, ChatTemplateConfig, TurbomindEngineConfig + lmdeploy_pipe = pipeline( model_path=model, - backend_config=TurbomindEngineConfig(tp=tp, model_format=model_format, quant_policy=quant_policy), - chat_template_config=ChatTemplateConfig(model_name=chat_template) if chat_template else None, - log_level='WARNING') + backend_config=TurbomindEngineConfig( + tp=tp, model_format=model_format, quant_policy=quant_policy + ), + chat_template_config=ChatTemplateConfig(model_name=chat_template) + if chat_template + else None, + log_level="WARNING", + ) return lmdeploy_pipe async def lmdeploy_model_if_cache( - model, prompt, system_prompt=None, history_messages=[], - chat_template=None, model_format='hf',quant_policy=0, **kwargs + model, + prompt, + system_prompt=None, + history_messages=[], + chat_template=None, + model_format="hf", + quant_policy=0, + **kwargs, ) -> str: """ Args: @@ -354,36 +375,37 @@ async def lmdeploy_model_if_cache( and so on. chat_template (str): needed when model is a pytorch model on huggingface.co, such as "internlm-chat-7b", - "Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on, + "Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on, and when the model name of local path did not match the original model name in HF. tp (int): tensor parallel prompt (Union[str, List[str]]): input texts to be completed. do_preprocess (bool): whether pre-process the messages. Default to True, which means chat_template will be applied. skip_special_tokens (bool): Whether or not to remove special tokens - in the decoding. Default to be False. - do_sample (bool): Whether or not to use sampling, use greedy decoding otherwise. + in the decoding. Default to be True. + do_sample (bool): Whether or not to use sampling, use greedy decoding otherwise. Default to be False, which means greedy decoding will be applied. """ try: import lmdeploy from lmdeploy import version_info, GenerationConfig - except: + except Exception: raise ImportError("Please install lmdeploy before intialize lmdeploy backend.") - + kwargs.pop("response_format", None) max_new_tokens = kwargs.pop("max_tokens", 512) - tp = kwargs.pop('tp', 1) - skip_special_tokens = kwargs.pop('skip_special_tokens', False) - do_preprocess = kwargs.pop('do_preprocess', True) - do_sample = kwargs.pop('do_sample', False) + tp = kwargs.pop("tp", 1) + skip_special_tokens = kwargs.pop("skip_special_tokens", True) + do_preprocess = kwargs.pop("do_preprocess", True) + do_sample = kwargs.pop("do_sample", False) gen_params = kwargs - + version = version_info if do_sample is not None and version < (0, 6, 0): raise RuntimeError( - '`do_sample` parameter is not supported by lmdeploy until ' - f'v0.6.0, but currently using lmdeloy {lmdeploy.__version__}') + "`do_sample` parameter is not supported by lmdeploy until " + f"v0.6.0, but currently using lmdeloy {lmdeploy.__version__}" + ) else: do_sample = True gen_params.update(do_sample=do_sample) @@ -394,7 +416,8 @@ async def lmdeploy_model_if_cache( chat_template=chat_template, model_format=model_format, quant_policy=quant_policy, - log_level='WARNING') + log_level="WARNING", + ) messages = [] if system_prompt: @@ -410,11 +433,19 @@ async def lmdeploy_model_if_cache( return if_cache_return["return"] gen_config = GenerationConfig( - skip_special_tokens=skip_special_tokens, max_new_tokens=max_new_tokens, **gen_params) + skip_special_tokens=skip_special_tokens, + max_new_tokens=max_new_tokens, + **gen_params, + ) response = "" - async for res in lmdeploy_pipe.generate(messages, gen_config=gen_config, - do_preprocess=do_preprocess, stream_response=False, session_id=1): + async for res in lmdeploy_pipe.generate( + messages, + gen_config=gen_config, + do_preprocess=do_preprocess, + stream_response=False, + session_id=1, + ): response += res.response if hashing_kv is not None: From 6d84569703213a006570dd68346d4399967e18f0 Mon Sep 17 00:00:00 2001 From: zrguo <49157727+LarFii@users.noreply.github.com> Date: Mon, 28 Oct 2024 09:59:40 +0800 Subject: [PATCH 26/39] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d11b1691..bfdf920f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ - +

From fee8575750cfdc7b03765048949536218a863531 Mon Sep 17 00:00:00 2001 From: zrguo <49157727+LarFii@users.noreply.github.com> Date: Mon, 28 Oct 2024 15:08:41 +0800 Subject: [PATCH 27/39] Update README.md --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bfdf920f..15696b57 100644 --- a/README.md +++ b/README.md @@ -237,7 +237,15 @@ rag.insert(["TEXT1", "TEXT2",...]) ```python # Incremental Insert: Insert new documents into an existing LightRAG instance -rag = LightRAG(working_dir="./dickens") +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=embedding_dimension, + max_token_size=8192, + func=embedding_func, + ), +) with open("./newText.txt") as f: rag.insert(f.read()) From 84b60e4aa687bc313115015c0376e94535ddf592 Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 28 Oct 2024 17:05:38 +0200 Subject: [PATCH 28/39] Fix lint issue --- examples/lightrag_ollama_demo.py | 5 ++--- lightrag/lightrag.py | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index 0a704024..1a320d13 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,14 +1,13 @@ import os import logging - -logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) - from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding from lightrag.utils import EmbeddingFunc WORKING_DIR = "./dickens" +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) + if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 955651fb..89ee1df5 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -155,7 +155,11 @@ class LightRAG: ) self.llm_model_func = limit_async_func_call(self.llm_model_max_async)( - partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs) + partial( + self.llm_model_func, + hashing_kv=self.llm_response_cache, + **self.llm_model_kwargs, + ) ) def insert(self, string_or_strings): From 6b80237805e6a5986387c2e674bf609214577079 Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 28 Oct 2024 19:05:59 +0200 Subject: [PATCH 29/39] Update README with more details --- README.md | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 15696b57..683dd0b2 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,10 @@ rag = LightRAG(

Using Ollama Models -* If you want to use Ollama models, you only need to set LightRAG as follows: +### Overview +If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`. + +Then you only need to set LightRAG as follows: ```python from lightrag.llm import ollama_model_complete, ollama_embedding @@ -185,28 +188,59 @@ rag = LightRAG( ) ``` -* Increasing the `num_ctx` parameter: +### Increasing context size +In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways: + +#### Increasing the `num_ctx` parameter in Modelfile. 1. Pull the model: -```python +```bash ollama pull qwen2 ``` 2. Display the model file: -```python +```bash ollama show --modelfile qwen2 > Modelfile ``` 3. Edit the Modelfile by adding the following line: -```python +```bash PARAMETER num_ctx 32768 ``` 4. Create the modified model: -```python +```bash ollama create -f Modelfile qwen2m ``` +#### Setup `num_ctx` via Ollama API. +Tiy can use `llm_model_kwargs` param to configure ollama: + +```python +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=ollama_model_complete, # Use Ollama model for text generation + llm_model_name='your_model_name', # Your model name + llm_model_kwargs={"options": {"num_ctx": 32768}}, + # Use Ollama embedding function + embedding_func=EmbeddingFunc( + embedding_dim=768, + max_token_size=8192, + func=lambda texts: ollama_embedding( + texts, + embed_model="nomic-embed-text" + ) + ), +) +``` +#### Fully functional example + +There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k. + +#### Low RAM GPUs + +In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`. +
### Query Param From cc83045f464cee691dd2adbf46df453360c7f857 Mon Sep 17 00:00:00 2001 From: MrGidea <98243922+MrGidea@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:44:41 +0800 Subject: [PATCH 30/39] Update README.md --- README.md | 866 +----------------------------------------------------- 1 file changed, 11 insertions(+), 855 deletions(-) diff --git a/README.md b/README.md index 683dd0b2..c7dbf6b6 100644 --- a/README.md +++ b/README.md @@ -1,861 +1,17 @@ -

๐Ÿš€ LightRAG: Simple and Fast Retrieval-Augmented Generation

- - -![่ฏทๆทปๅŠ ๅ›พ็‰‡ๆ่ฟฐ](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg) - -
-

- - - - -

-

- - - - -

- -This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag). -![่ฏทๆทปๅŠ ๅ›พ็‰‡ๆ่ฟฐ](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png) -
- -## ๐ŸŽ‰ News -- [x] [2024.10.20]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWeโ€™ve added a new feature to LightRAG: Graph Visualization. -- [x] [2024.10.18]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWeโ€™ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author! -- [x] [2024.10.17]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWe have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! ๐ŸŽ‰๐ŸŽ‰ -- [x] [2024.10.16]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! -- [x] [2024.10.15]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! - -## Algorithm Flowchart - -![LightRAG_Self excalidraw](https://github.com/user-attachments/assets/aa5c4892-2e44-49e6-a116-2403ed80a1a3) - - -## Install - -* Install from source (Recommend) - +## Quick start +* install textract ```bash -cd LightRAG -pip install -e . +pip install textract ``` -* Install from PyPI +*example ```bash -pip install lightrag-hku +import textract +# ๆŒ‡ๅฎš่ฆๆๅ–ๆ–‡ๆœฌ็š„ๆ–‡ไปถ่ทฏๅพ„ +file_path = 'path/to/your/file.pdf' +# ไปŽๆ–‡ไปถไธญๆๅ–ๆ–‡ๆœฌ +text_content = textract.process(file_path) +# ๆ‰“ๅฐๆๅ–็š„ๆ–‡ๆœฌ +print(text_content.decode('utf-8')) ``` -## Quick Start -* [Video demo](https://www.youtube.com/watch?v=g21royNJ4fw) of running LightRAG locally. -* All the code can be found in the `examples`. -* Set OpenAI API key in environment if using OpenAI models: `export OPENAI_API_KEY="sk-...".` -* Download the demo text "A Christmas Carol by Charles Dickens": -```bash -curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt -``` -Use the below Python snippet (in a script) to initialize LightRAG and perform queries: -```python -import os -from lightrag import LightRAG, QueryParam -from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete - -######### -# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() -# import nest_asyncio -# nest_asyncio.apply() -######### - -WORKING_DIR = "./dickens" - - -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model - # llm_model_func=gpt_4o_complete # Optionally, use a stronger model -) - -with open("./book.txt") as f: - rag.insert(f.read()) - -# Perform naive search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))) - -# Perform local search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))) - -# Perform global search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))) - -# Perform hybrid search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) -``` - -
- Using Open AI-like APIs - -* LightRAG also supports Open AI-like chat/embeddings APIs: -```python -async def llm_model_func( - prompt, system_prompt=None, history_messages=[], **kwargs -) -> str: - return await openai_complete_if_cache( - "solar-mini", - prompt, - system_prompt=system_prompt, - history_messages=history_messages, - api_key=os.getenv("UPSTAGE_API_KEY"), - base_url="https://api.upstage.ai/v1/solar", - **kwargs - ) - -async def embedding_func(texts: list[str]) -> np.ndarray: - return await openai_embedding( - texts, - model="solar-embedding-1-large-query", - api_key=os.getenv("UPSTAGE_API_KEY"), - base_url="https://api.upstage.ai/v1/solar" - ) - -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=4096, - max_token_size=8192, - func=embedding_func - ) -) -``` -
- -
- Using Hugging Face Models - -* If you want to use Hugging Face models, you only need to set LightRAG as follows: -```python -from lightrag.llm import hf_model_complete, hf_embedding -from transformers import AutoModel, AutoTokenizer - -# Initialize LightRAG with Hugging Face model -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=hf_model_complete, # Use Hugging Face model for text generation - llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face - # Use Hugging Face embedding function - embedding_func=EmbeddingFunc( - embedding_dim=384, - max_token_size=5000, - func=lambda texts: hf_embedding( - texts, - tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), - embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") - ) - ), -) -``` -
- -
- Using Ollama Models - -### Overview -If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`. - -Then you only need to set LightRAG as follows: - -```python -from lightrag.llm import ollama_model_complete, ollama_embedding - -# Initialize LightRAG with Ollama model -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=ollama_model_complete, # Use Ollama model for text generation - llm_model_name='your_model_name', # Your model name - # Use Ollama embedding function - embedding_func=EmbeddingFunc( - embedding_dim=768, - max_token_size=8192, - func=lambda texts: ollama_embedding( - texts, - embed_model="nomic-embed-text" - ) - ), -) -``` - -### Increasing context size -In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways: - -#### Increasing the `num_ctx` parameter in Modelfile. - -1. Pull the model: -```bash -ollama pull qwen2 -``` - -2. Display the model file: -```bash -ollama show --modelfile qwen2 > Modelfile -``` - -3. Edit the Modelfile by adding the following line: -```bash -PARAMETER num_ctx 32768 -``` - -4. Create the modified model: -```bash -ollama create -f Modelfile qwen2m -``` - -#### Setup `num_ctx` via Ollama API. -Tiy can use `llm_model_kwargs` param to configure ollama: - -```python -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=ollama_model_complete, # Use Ollama model for text generation - llm_model_name='your_model_name', # Your model name - llm_model_kwargs={"options": {"num_ctx": 32768}}, - # Use Ollama embedding function - embedding_func=EmbeddingFunc( - embedding_dim=768, - max_token_size=8192, - func=lambda texts: ollama_embedding( - texts, - embed_model="nomic-embed-text" - ) - ), -) -``` -#### Fully functional example - -There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k. - -#### Low RAM GPUs - -In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`. - -
- -### Query Param - -```python -class QueryParam: - mode: Literal["local", "global", "hybrid", "naive"] = "global" - only_need_context: bool = False - response_type: str = "Multiple Paragraphs" - # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. - top_k: int = 60 - # Number of tokens for the original chunks. - max_token_for_text_unit: int = 4000 - # Number of tokens for the relationship descriptions - max_token_for_global_context: int = 4000 - # Number of tokens for the entity descriptions - max_token_for_local_context: int = 4000 -``` - -### Batch Insert - -```python -# Batch Insert: Insert multiple texts at once -rag.insert(["TEXT1", "TEXT2",...]) -``` - -### Incremental Insert - -```python -# Incremental Insert: Insert new documents into an existing LightRAG instance -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=embedding_dimension, - max_token_size=8192, - func=embedding_func, - ), -) - -with open("./newText.txt") as f: - rag.insert(f.read()) -``` - -### Graph Visualization - -
- Graph visualization with html - -* The following code can be found in `examples/graph_visual_with_html.py` - -```python -import networkx as nx -from pyvis.network import Network - -# Load the GraphML file -G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml') - -# Create a Pyvis network -net = Network(notebook=True) - -# Convert NetworkX graph to Pyvis network -net.from_nx(G) - -# Save and display the network -net.show('knowledge_graph.html') -``` - -
- -
- Graph visualization with Neo4j - -* The following code can be found in `examples/graph_visual_with_neo4j.py` - -```python -import os -import json -from lightrag.utils import xml_to_json -from neo4j import GraphDatabase - -# Constants -WORKING_DIR = "./dickens" -BATCH_SIZE_NODES = 500 -BATCH_SIZE_EDGES = 100 - -# Neo4j connection credentials -NEO4J_URI = "bolt://localhost:7687" -NEO4J_USERNAME = "neo4j" -NEO4J_PASSWORD = "your_password" - -def convert_xml_to_json(xml_path, output_path): - """Converts XML file to JSON and saves the output.""" - if not os.path.exists(xml_path): - print(f"Error: File not found - {xml_path}") - return None - - json_data = xml_to_json(xml_path) - if json_data: - with open(output_path, 'w', encoding='utf-8') as f: - json.dump(json_data, f, ensure_ascii=False, indent=2) - print(f"JSON file created: {output_path}") - return json_data - else: - print("Failed to create JSON data") - return None - -def process_in_batches(tx, query, data, batch_size): - """Process data in batches and execute the given query.""" - for i in range(0, len(data), batch_size): - batch = data[i:i + batch_size] - tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch}) - -def main(): - # Paths - xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml') - json_file = os.path.join(WORKING_DIR, 'graph_data.json') - - # Convert XML to JSON - json_data = convert_xml_to_json(xml_file, json_file) - if json_data is None: - return - - # Load nodes and edges - nodes = json_data.get('nodes', []) - edges = json_data.get('edges', []) - - # Neo4j queries - create_nodes_query = """ - UNWIND $nodes AS node - MERGE (e:Entity {id: node.id}) - SET e.entity_type = node.entity_type, - e.description = node.description, - e.source_id = node.source_id, - e.displayName = node.id - REMOVE e:Entity - WITH e, node - CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode - RETURN count(*) - """ - - create_edges_query = """ - UNWIND $edges AS edge - MATCH (source {id: edge.source}) - MATCH (target {id: edge.target}) - WITH source, target, edge, - CASE - WHEN edge.keywords CONTAINS 'lead' THEN 'lead' - WHEN edge.keywords CONTAINS 'participate' THEN 'participate' - WHEN edge.keywords CONTAINS 'uses' THEN 'uses' - WHEN edge.keywords CONTAINS 'located' THEN 'located' - WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs' - ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '') - END AS relType - CALL apoc.create.relationship(source, relType, { - weight: edge.weight, - description: edge.description, - keywords: edge.keywords, - source_id: edge.source_id - }, target) YIELD rel - RETURN count(*) - """ - - set_displayname_and_labels_query = """ - MATCH (n) - SET n.displayName = n.id - WITH n - CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node - RETURN count(*) - """ - - # Create a Neo4j driver - driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) - - try: - # Execute queries in batches - with driver.session() as session: - # Insert nodes in batches - session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES) - - # Insert edges in batches - session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES) - - # Set displayName and labels - session.run(set_displayname_and_labels_query) - - except Exception as e: - print(f"Error occurred: {e}") - - finally: - driver.close() - -if __name__ == "__main__": - main() -``` - -
- -## API Server Implementation - -LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests. - -### Setting up the API Server -
-Click to expand setup instructions - -1. First, ensure you have the required dependencies: -```bash -pip install fastapi uvicorn pydantic -``` - -2. Set up your environment variables: -```bash -export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" -``` - -3. Run the API server: -```bash -python examples/lightrag_api_openai_compatible_demo.py -``` - -The server will start on `http://0.0.0.0:8020`. -
- -### API Endpoints - -The API server provides the following endpoints: - -#### 1. Query Endpoint -
-Click to view Query endpoint details - -- **URL:** `/query` -- **Method:** POST -- **Body:** -```json -{ - "query": "Your question here", - "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid" -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/query" \ - -H "Content-Type: application/json" \ - -d '{"query": "What are the main themes?", "mode": "hybrid"}' -``` -
- -#### 2. Insert Text Endpoint -
-Click to view Insert Text endpoint details - -- **URL:** `/insert` -- **Method:** POST -- **Body:** -```json -{ - "text": "Your text content here" -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/insert" \ - -H "Content-Type: application/json" \ - -d '{"text": "Content to be inserted into RAG"}' -``` -
- -#### 3. Insert File Endpoint -
-Click to view Insert File endpoint details - -- **URL:** `/insert_file` -- **Method:** POST -- **Body:** -```json -{ - "file_path": "path/to/your/file.txt" -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/insert_file" \ - -H "Content-Type: application/json" \ - -d '{"file_path": "./book.txt"}' -``` -
- -#### 4. Health Check Endpoint -
-Click to view Health Check endpoint details - -- **URL:** `/health` -- **Method:** GET -- **Example:** -```bash -curl -X GET "http://127.0.0.1:8020/health" -``` -
- -### Configuration - -The API server can be configured using environment variables: -- `RAG_DIR`: Directory for storing the RAG index (default: "index_default") -- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers - -### Error Handling -
-Click to view error handling details - -The API includes comprehensive error handling: -- File not found errors (404) -- Processing errors (500) -- Supports multiple file encodings (UTF-8 and GBK) -
- -## Evaluation -### Dataset -The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain). - -### Generate Query -LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `example/generate_query.py`. - -
- Prompt - -```python -Given the following description of a dataset: - -{description} - -Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset. - -Output the results in the following structure: -- User 1: [user description] - - Task 1: [task description] - - Question 1: - - Question 2: - - Question 3: - - Question 4: - - Question 5: - - Task 2: [task description] - ... - - Task 5: [task description] -- User 2: [user description] - ... -- User 5: [user description] - ... -``` -
- - ### Batch Eval -To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`. - -
- Prompt - -```python ----Role--- -You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. ----Goal--- -You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. - -- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question? -- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question? -- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic? - -For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories. - -Here is the question: -{query} - -Here are the two answers: - -**Answer 1:** -{answer1} - -**Answer 2:** -{answer2} - -Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion. - -Output your evaluation in the following JSON format: - -{{ - "Comprehensiveness": {{ - "Winner": "[Answer 1 or Answer 2]", - "Explanation": "[Provide explanation here]" - }}, - "Empowerment": {{ - "Winner": "[Answer 1 or Answer 2]", - "Explanation": "[Provide explanation here]" - }}, - "Overall Winner": {{ - "Winner": "[Answer 1 or Answer 2]", - "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]" - }} -}} -``` -
- -### Overall Performance Table -| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | | -|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------| -| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | -| **Comprehensiveness** | 32.69% | **67.31%** | 35.44% | **64.56%** | 19.05% | **80.95%** | 36.36% | **63.64%** | -| **Diversity** | 24.09% | **75.91%** | 35.24% | **64.76%** | 10.98% | **89.02%** | 30.76% | **69.24%** | -| **Empowerment** | 31.35% | **68.65%** | 35.48% | **64.52%** | 17.59% | **82.41%** | 40.95% | **59.05%** | -| **Overall** | 33.30% | **66.70%** | 34.76% | **65.24%** | 17.46% | **82.54%** | 37.59% | **62.40%** | -| | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | -| **Comprehensiveness** | 32.05% | **67.95%** | 39.30% | **60.70%** | 18.57% | **81.43%** | 38.89% | **61.11%** | -| **Diversity** | 29.44% | **70.56%** | 38.71% | **61.29%** | 15.14% | **84.86%** | 28.50% | **71.50%** | -| **Empowerment** | 32.51% | **67.49%** | 37.52% | **62.48%** | 17.80% | **82.20%** | 43.96% | **56.04%** | -| **Overall** | 33.29% | **66.71%** | 39.03% | **60.97%** | 17.80% | **82.20%** | 39.61% | **60.39%** | -| | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | -| **Comprehensiveness** | 24.39% | **75.61%** | 36.49% | **63.51%** | 27.68% | **72.32%** | 42.17% | **57.83%** | -| **Diversity** | 24.96% | **75.34%** | 37.41% | **62.59%** | 18.79% | **81.21%** | 30.88% | **69.12%** | -| **Empowerment** | 24.89% | **75.11%** | 34.99% | **65.01%** | 26.99% | **73.01%** | **45.61%** | **54.39%** | -| **Overall** | 23.17% | **76.83%** | 35.67% | **64.33%** | 27.68% | **72.32%** | 42.72% | **57.28%** | -| | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | -| **Comprehensiveness** | 45.56% | **54.44%** | 45.98% | **54.02%** | 47.13% | **52.87%** | **51.86%** | 48.14% | -| **Diversity** | 19.65% | **80.35%** | 39.64% | **60.36%** | 25.55% | **74.45%** | 35.87% | **64.13%** | -| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% | -| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% | - -## Reproduce -All the code can be found in the `./reproduce` directory. - -### Step-0 Extract Unique Contexts -First, we need to extract unique contexts in the datasets. - -
- Code - -```python -def extract_unique_contexts(input_directory, output_directory): - - os.makedirs(output_directory, exist_ok=True) - - jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl')) - print(f"Found {len(jsonl_files)} JSONL files.") - - for file_path in jsonl_files: - filename = os.path.basename(file_path) - name, ext = os.path.splitext(filename) - output_filename = f"{name}_unique_contexts.json" - output_path = os.path.join(output_directory, output_filename) - - unique_contexts_dict = {} - - print(f"Processing file: {filename}") - - try: - with open(file_path, 'r', encoding='utf-8') as infile: - for line_number, line in enumerate(infile, start=1): - line = line.strip() - if not line: - continue - try: - json_obj = json.loads(line) - context = json_obj.get('context') - if context and context not in unique_contexts_dict: - unique_contexts_dict[context] = None - except json.JSONDecodeError as e: - print(f"JSON decoding error in file {filename} at line {line_number}: {e}") - except FileNotFoundError: - print(f"File not found: {filename}") - continue - except Exception as e: - print(f"An error occurred while processing file {filename}: {e}") - continue - - unique_contexts_list = list(unique_contexts_dict.keys()) - print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.") - - try: - with open(output_path, 'w', encoding='utf-8') as outfile: - json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4) - print(f"Unique `context` entries have been saved to: {output_filename}") - except Exception as e: - print(f"An error occurred while saving to the file {output_filename}: {e}") - - print("All files have been processed.") - -``` -
- -### Step-1 Insert Contexts -For the extracted contexts, we insert them into the LightRAG system. - -
- Code - -```python -def insert_text(rag, file_path): - with open(file_path, mode='r') as f: - unique_contexts = json.load(f) - - retries = 0 - max_retries = 3 - while retries < max_retries: - try: - rag.insert(unique_contexts) - break - except Exception as e: - retries += 1 - print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}") - time.sleep(10) - if retries == max_retries: - print("Insertion failed after exceeding the maximum number of retries") -``` -
- -### Step-2 Generate Queries - -We extract tokens from the first and the second half of each context in the dataset, then combine them as dataset descriptions to generate queries. - -
- Code - -```python -tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - -def get_summary(context, tot_tokens=2000): - tokens = tokenizer.tokenize(context) - half_tokens = tot_tokens // 2 - - start_tokens = tokens[1000:1000 + half_tokens] - end_tokens = tokens[-(1000 + half_tokens):1000] - - summary_tokens = start_tokens + end_tokens - summary = tokenizer.convert_tokens_to_string(summary_tokens) - - return summary -``` -
- -### Step-3 Query -For the queries generated in Step-2, we will extract them and query LightRAG. - -
- Code - -```python -def extract_queries(file_path): - with open(file_path, 'r') as f: - data = f.read() - - data = data.replace('**', '') - - queries = re.findall(r'- Question \d+: (.+)', data) - - return queries -``` -
- -## Code Structure - -```python -. -โ”œโ”€โ”€ examples -โ”‚ โ”œโ”€โ”€ batch_eval.py -โ”‚ โ”œโ”€โ”€ graph_visual_with_html.py -โ”‚ โ”œโ”€โ”€ graph_visual_with_neo4j.py -โ”‚ โ”œโ”€โ”€ generate_query.py -โ”‚ โ”œโ”€โ”€ lightrag_azure_openai_demo.py -โ”‚ โ”œโ”€โ”€ lightrag_bedrock_demo.py -โ”‚ โ”œโ”€โ”€ lightrag_hf_demo.py -โ”‚ โ”œโ”€โ”€ lightrag_ollama_demo.py -โ”‚ โ”œโ”€โ”€ lightrag_openai_compatible_demo.py -โ”‚ โ”œโ”€โ”€ lightrag_openai_demo.py -โ”‚ โ”œโ”€โ”€ lightrag_siliconcloud_demo.py -โ”‚ โ””โ”€โ”€ vram_management_demo.py -โ”œโ”€โ”€ lightrag -โ”‚ โ”œโ”€โ”€ __init__.py -โ”‚ โ”œโ”€โ”€ base.py -โ”‚ โ”œโ”€โ”€ lightrag.py -โ”‚ โ”œโ”€โ”€ llm.py -โ”‚ โ”œโ”€โ”€ operate.py -โ”‚ โ”œโ”€โ”€ prompt.py -โ”‚ โ”œโ”€โ”€ storage.py -โ”‚ โ””โ”€โ”€ utils.py -โ”œโ”€โ”€ reproduce -โ”‚ โ”œโ”€โ”€ Step_0.py -โ”‚ โ”œโ”€โ”€ Step_1.py -โ”‚ โ”œโ”€โ”€ Step_2.py -โ”‚ โ””โ”€โ”€ Step_3.py -โ”œโ”€โ”€ .gitignore -โ”œโ”€โ”€ .pre-commit-config.yaml -โ”œโ”€โ”€ LICENSE -โ”œโ”€โ”€ README.md -โ”œโ”€โ”€ requirements.txt -โ””โ”€โ”€ setup.py -``` - -## Star History - - - - - - Star History Chart - - - -## Citation - -```python -@article{guo2024lightrag, -title={LightRAG: Simple and Fast Retrieval-Augmented Generation}, -author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang}, -year={2024}, -eprint={2410.05779}, -archivePrefix={arXiv}, -primaryClass={cs.IR} -} -``` From 8ce390db33d71b69156290990e31f3015f35e5b2 Mon Sep 17 00:00:00 2001 From: MrGidea <98243922+MrGidea@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:45:00 +0800 Subject: [PATCH 31/39] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c7dbf6b6..7f133490 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ```bash pip install textract ``` -*example +* example ```bash import textract # ๆŒ‡ๅฎš่ฆๆๅ–ๆ–‡ๆœฌ็š„ๆ–‡ไปถ่ทฏๅพ„ From 116dc1c31550110d6ab5a0236974c25cf7d46cc6 Mon Sep 17 00:00:00 2001 From: MrGidea <98243922+MrGidea@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:46:48 +0800 Subject: [PATCH 32/39] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7f133490..921b44d8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ ## Quick start +Currently, the test supports pptx, pdf, csv, word, txt file types * install textract ```bash pip install textract From fe5621e25aef139c6f8c340ee4eb1756a4900451 Mon Sep 17 00:00:00 2001 From: MrGidea <98243922+MrGidea@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:47:17 +0800 Subject: [PATCH 33/39] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 921b44d8..660c7b27 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ ## Quick start -Currently, the test supports pptx, pdf, csv, word, txt file types +Currently, the test supports pptx, pdf, csv, docx, txt file types * install textract ```bash pip install textract From 818074d258f2e90175a4eb123b46ce0191f78ea1 Mon Sep 17 00:00:00 2001 From: LarFii <834462287@qq.com> Date: Tue, 29 Oct 2024 16:16:11 +0800 Subject: [PATCH 34/39] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 683dd0b2..870c2d6f 100644 --- a/README.md +++ b/README.md @@ -804,12 +804,14 @@ def extract_queries(file_path): . โ”œโ”€โ”€ examples โ”‚ โ”œโ”€โ”€ batch_eval.py +โ”‚ โ”œโ”€โ”€ generate_query.py โ”‚ โ”œโ”€โ”€ graph_visual_with_html.py โ”‚ โ”œโ”€โ”€ graph_visual_with_neo4j.py -โ”‚ โ”œโ”€โ”€ generate_query.py +โ”‚ โ”œโ”€โ”€ lightrag_api_openai_compatible_demo.py โ”‚ โ”œโ”€โ”€ lightrag_azure_openai_demo.py โ”‚ โ”œโ”€โ”€ lightrag_bedrock_demo.py โ”‚ โ”œโ”€โ”€ lightrag_hf_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_lmdeploy_demo.py โ”‚ โ”œโ”€โ”€ lightrag_ollama_demo.py โ”‚ โ”œโ”€โ”€ lightrag_openai_compatible_demo.py โ”‚ โ”œโ”€โ”€ lightrag_openai_demo.py @@ -826,8 +828,10 @@ def extract_queries(file_path): โ”‚ โ””โ”€โ”€ utils.py โ”œโ”€โ”€ reproduce โ”‚ โ”œโ”€โ”€ Step_0.py +โ”‚ โ”œโ”€โ”€ Step_1_openai_compatible.py โ”‚ โ”œโ”€โ”€ Step_1.py โ”‚ โ”œโ”€โ”€ Step_2.py +โ”‚ โ”œโ”€โ”€ Step_3_openai_compatible.py โ”‚ โ””โ”€โ”€ Step_3.py โ”œโ”€โ”€ .gitignore โ”œโ”€โ”€ .pre-commit-config.yaml From fb093690da78d65cd5783797c50ffe80be8e9d01 Mon Sep 17 00:00:00 2001 From: MrGidea <98243922+MrGidea@users.noreply.github.com> Date: Tue, 29 Oct 2024 16:36:04 +0800 Subject: [PATCH 35/39] Update README.md --- README.md | 884 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 874 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 660c7b27..acfd7c96 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,882 @@ -## Quick start -Currently, the test supports pptx, pdf, csv, docx, txt file types -* install textract +

๐Ÿš€ LightRAG: Simple and Fast Retrieval-Augmented Generation

+ + +![่ฏทๆทปๅŠ ๅ›พ็‰‡ๆ่ฟฐ](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg) + +
+

+ + + + +

+

+ + + + +

+ +This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag). +![่ฏทๆทปๅŠ ๅ›พ็‰‡ๆ่ฟฐ](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png) +
+ +## ๐ŸŽ‰ News +- [x] [2024.10.29]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขMulti-file types are now supported by `textract`. +- [x] [2024.10.20]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWeโ€™ve added a new feature to LightRAG: Graph Visualization. +- [x] [2024.10.18]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWeโ€™ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author! +- [x] [2024.10.17]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWe have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! ๐ŸŽ‰๐ŸŽ‰ +- [x] [2024.10.16]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! +- [x] [2024.10.15]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! + +## Algorithm Flowchart + +![LightRAG_Self excalidraw](https://github.com/user-attachments/assets/aa5c4892-2e44-49e6-a116-2403ed80a1a3) + + +## Install + +* Install from source (Recommend) + ```bash -pip install textract +cd LightRAG +pip install -e . ``` -* example +* Install from PyPI ```bash +pip install lightrag-hku +``` + +## Quick Start +* [Video demo](https://www.youtube.com/watch?v=g21royNJ4fw) of running LightRAG locally. +* All the code can be found in the `examples`. +* Set OpenAI API key in environment if using OpenAI models: `export OPENAI_API_KEY="sk-...".` +* Download the demo text "A Christmas Carol by Charles Dickens": +```bash +curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt +``` +Use the below Python snippet (in a script) to initialize LightRAG and perform queries: + +```python +import os +from lightrag import LightRAG, QueryParam +from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete + +######### +# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() +# import nest_asyncio +# nest_asyncio.apply() +######### + +WORKING_DIR = "./dickens" + + +if not os.path.exists(WORKING_DIR): + os.mkdir(WORKING_DIR) + +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=gpt_4o_mini_complete # Use gpt_4o_mini_complete LLM model + # llm_model_func=gpt_4o_complete # Optionally, use a stronger model +) + +with open("./book.txt") as f: + rag.insert(f.read()) + +# Perform naive search +print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))) + +# Perform local search +print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))) + +# Perform global search +print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))) + +# Perform hybrid search +print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) +``` + +
+ Using Open AI-like APIs + +* LightRAG also supports Open AI-like chat/embeddings APIs: +```python +async def llm_model_func( + prompt, system_prompt=None, history_messages=[], **kwargs +) -> str: + return await openai_complete_if_cache( + "solar-mini", + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key=os.getenv("UPSTAGE_API_KEY"), + base_url="https://api.upstage.ai/v1/solar", + **kwargs + ) + +async def embedding_func(texts: list[str]) -> np.ndarray: + return await openai_embedding( + texts, + model="solar-embedding-1-large-query", + api_key=os.getenv("UPSTAGE_API_KEY"), + base_url="https://api.upstage.ai/v1/solar" + ) + +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=4096, + max_token_size=8192, + func=embedding_func + ) +) +``` +
+ +
+ Using Hugging Face Models + +* If you want to use Hugging Face models, you only need to set LightRAG as follows: +```python +from lightrag.llm import hf_model_complete, hf_embedding +from transformers import AutoModel, AutoTokenizer + +# Initialize LightRAG with Hugging Face model +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=hf_model_complete, # Use Hugging Face model for text generation + llm_model_name='meta-llama/Llama-3.1-8B-Instruct', # Model name from Hugging Face + # Use Hugging Face embedding function + embedding_func=EmbeddingFunc( + embedding_dim=384, + max_token_size=5000, + func=lambda texts: hf_embedding( + texts, + tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"), + embed_model=AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") + ) + ), +) +``` +
+ +
+ Using Ollama Models + +### Overview +If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`. + +Then you only need to set LightRAG as follows: + +```python +from lightrag.llm import ollama_model_complete, ollama_embedding + +# Initialize LightRAG with Ollama model +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=ollama_model_complete, # Use Ollama model for text generation + llm_model_name='your_model_name', # Your model name + # Use Ollama embedding function + embedding_func=EmbeddingFunc( + embedding_dim=768, + max_token_size=8192, + func=lambda texts: ollama_embedding( + texts, + embed_model="nomic-embed-text" + ) + ), +) +``` + +### Increasing context size +In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways: + +#### Increasing the `num_ctx` parameter in Modelfile. + +1. Pull the model: +```bash +ollama pull qwen2 +``` + +2. Display the model file: +```bash +ollama show --modelfile qwen2 > Modelfile +``` + +3. Edit the Modelfile by adding the following line: +```bash +PARAMETER num_ctx 32768 +``` + +4. Create the modified model: +```bash +ollama create -f Modelfile qwen2m +``` + +#### Setup `num_ctx` via Ollama API. +Tiy can use `llm_model_kwargs` param to configure ollama: + +```python +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=ollama_model_complete, # Use Ollama model for text generation + llm_model_name='your_model_name', # Your model name + llm_model_kwargs={"options": {"num_ctx": 32768}}, + # Use Ollama embedding function + embedding_func=EmbeddingFunc( + embedding_dim=768, + max_token_size=8192, + func=lambda texts: ollama_embedding( + texts, + embed_model="nomic-embed-text" + ) + ), +) +``` +#### Fully functional example + +There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k. + +#### Low RAM GPUs + +In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`. + +
+ +### Query Param + +```python +class QueryParam: + mode: Literal["local", "global", "hybrid", "naive"] = "global" + only_need_context: bool = False + response_type: str = "Multiple Paragraphs" + # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. + top_k: int = 60 + # Number of tokens for the original chunks. + max_token_for_text_unit: int = 4000 + # Number of tokens for the relationship descriptions + max_token_for_global_context: int = 4000 + # Number of tokens for the entity descriptions + max_token_for_local_context: int = 4000 +``` + +### Batch Insert + +```python +# Batch Insert: Insert multiple texts at once +rag.insert(["TEXT1", "TEXT2",...]) +``` + +### Incremental Insert + +```python +# Incremental Insert: Insert new documents into an existing LightRAG instance +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=embedding_dimension, + max_token_size=8192, + func=embedding_func, + ), +) + +with open("./newText.txt") as f: + rag.insert(f.read()) +``` + +### Multi-file Type Support + +The `testract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF. + +```python import textract -# ๆŒ‡ๅฎš่ฆๆๅ–ๆ–‡ๆœฌ็š„ๆ–‡ไปถ่ทฏๅพ„ -file_path = 'path/to/your/file.pdf' -# ไปŽๆ–‡ไปถไธญๆๅ–ๆ–‡ๆœฌ + +file_path = 'TEXT.pdf' text_content = textract.process(file_path) -# ๆ‰“ๅฐๆๅ–็š„ๆ–‡ๆœฌ -print(text_content.decode('utf-8')) + +rag.insert(text_content.decode('utf-8')) +``` + +### Graph Visualization + +
+ Graph visualization with html + +* The following code can be found in `examples/graph_visual_with_html.py` + +```python +import networkx as nx +from pyvis.network import Network + +# Load the GraphML file +G = nx.read_graphml('./dickens/graph_chunk_entity_relation.graphml') + +# Create a Pyvis network +net = Network(notebook=True) + +# Convert NetworkX graph to Pyvis network +net.from_nx(G) + +# Save and display the network +net.show('knowledge_graph.html') +``` + +
+ +
+ Graph visualization with Neo4j + +* The following code can be found in `examples/graph_visual_with_neo4j.py` + +```python +import os +import json +from lightrag.utils import xml_to_json +from neo4j import GraphDatabase + +# Constants +WORKING_DIR = "./dickens" +BATCH_SIZE_NODES = 500 +BATCH_SIZE_EDGES = 100 + +# Neo4j connection credentials +NEO4J_URI = "bolt://localhost:7687" +NEO4J_USERNAME = "neo4j" +NEO4J_PASSWORD = "your_password" + +def convert_xml_to_json(xml_path, output_path): + """Converts XML file to JSON and saves the output.""" + if not os.path.exists(xml_path): + print(f"Error: File not found - {xml_path}") + return None + + json_data = xml_to_json(xml_path) + if json_data: + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(json_data, f, ensure_ascii=False, indent=2) + print(f"JSON file created: {output_path}") + return json_data + else: + print("Failed to create JSON data") + return None + +def process_in_batches(tx, query, data, batch_size): + """Process data in batches and execute the given query.""" + for i in range(0, len(data), batch_size): + batch = data[i:i + batch_size] + tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch}) + +def main(): + # Paths + xml_file = os.path.join(WORKING_DIR, 'graph_chunk_entity_relation.graphml') + json_file = os.path.join(WORKING_DIR, 'graph_data.json') + + # Convert XML to JSON + json_data = convert_xml_to_json(xml_file, json_file) + if json_data is None: + return + + # Load nodes and edges + nodes = json_data.get('nodes', []) + edges = json_data.get('edges', []) + + # Neo4j queries + create_nodes_query = """ + UNWIND $nodes AS node + MERGE (e:Entity {id: node.id}) + SET e.entity_type = node.entity_type, + e.description = node.description, + e.source_id = node.source_id, + e.displayName = node.id + REMOVE e:Entity + WITH e, node + CALL apoc.create.addLabels(e, [node.entity_type]) YIELD node AS labeledNode + RETURN count(*) + """ + + create_edges_query = """ + UNWIND $edges AS edge + MATCH (source {id: edge.source}) + MATCH (target {id: edge.target}) + WITH source, target, edge, + CASE + WHEN edge.keywords CONTAINS 'lead' THEN 'lead' + WHEN edge.keywords CONTAINS 'participate' THEN 'participate' + WHEN edge.keywords CONTAINS 'uses' THEN 'uses' + WHEN edge.keywords CONTAINS 'located' THEN 'located' + WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs' + ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '') + END AS relType + CALL apoc.create.relationship(source, relType, { + weight: edge.weight, + description: edge.description, + keywords: edge.keywords, + source_id: edge.source_id + }, target) YIELD rel + RETURN count(*) + """ + + set_displayname_and_labels_query = """ + MATCH (n) + SET n.displayName = n.id + WITH n + CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node + RETURN count(*) + """ + + # Create a Neo4j driver + driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) + + try: + # Execute queries in batches + with driver.session() as session: + # Insert nodes in batches + session.execute_write(process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES) + + # Insert edges in batches + session.execute_write(process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES) + + # Set displayName and labels + session.run(set_displayname_and_labels_query) + + except Exception as e: + print(f"Error occurred: {e}") + + finally: + driver.close() + +if __name__ == "__main__": + main() +``` + +
+ +## API Server Implementation + +LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests. + +### Setting up the API Server +
+Click to expand setup instructions + +1. First, ensure you have the required dependencies: +```bash +pip install fastapi uvicorn pydantic +``` + +2. Set up your environment variables: +```bash +export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" +``` + +3. Run the API server: +```bash +python examples/lightrag_api_openai_compatible_demo.py +``` + +The server will start on `http://0.0.0.0:8020`. +
+ +### API Endpoints + +The API server provides the following endpoints: + +#### 1. Query Endpoint +
+Click to view Query endpoint details + +- **URL:** `/query` +- **Method:** POST +- **Body:** +```json +{ + "query": "Your question here", + "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/query" \ + -H "Content-Type: application/json" \ + -d '{"query": "What are the main themes?", "mode": "hybrid"}' +``` +
+ +#### 2. Insert Text Endpoint +
+Click to view Insert Text endpoint details + +- **URL:** `/insert` +- **Method:** POST +- **Body:** +```json +{ + "text": "Your text content here" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/insert" \ + -H "Content-Type: application/json" \ + -d '{"text": "Content to be inserted into RAG"}' +``` +
+ +#### 3. Insert File Endpoint +
+Click to view Insert File endpoint details + +- **URL:** `/insert_file` +- **Method:** POST +- **Body:** +```json +{ + "file_path": "path/to/your/file.txt" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/insert_file" \ + -H "Content-Type: application/json" \ + -d '{"file_path": "./book.txt"}' +``` +
+ +#### 4. Health Check Endpoint +
+Click to view Health Check endpoint details + +- **URL:** `/health` +- **Method:** GET +- **Example:** +```bash +curl -X GET "http://127.0.0.1:8020/health" +``` +
+ +### Configuration + +The API server can be configured using environment variables: +- `RAG_DIR`: Directory for storing the RAG index (default: "index_default") +- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers + +### Error Handling +
+Click to view error handling details + +The API includes comprehensive error handling: +- File not found errors (404) +- Processing errors (500) +- Supports multiple file encodings (UTF-8 and GBK) +
+ +## Evaluation +### Dataset +The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain). + +### Generate Query +LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `example/generate_query.py`. + +
+ Prompt + +```python +Given the following description of a dataset: + +{description} + +Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset. + +Output the results in the following structure: +- User 1: [user description] + - Task 1: [task description] + - Question 1: + - Question 2: + - Question 3: + - Question 4: + - Question 5: + - Task 2: [task description] + ... + - Task 5: [task description] +- User 2: [user description] + ... +- User 5: [user description] + ... +``` +
+ + ### Batch Eval +To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `example/batch_eval.py`. + +
+ Prompt + +```python +---Role--- +You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. +---Goal--- +You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. + +- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question? +- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question? +- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic? + +For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories. + +Here is the question: +{query} + +Here are the two answers: + +**Answer 1:** +{answer1} + +**Answer 2:** +{answer2} + +Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion. + +Output your evaluation in the following JSON format: + +{{ + "Comprehensiveness": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Provide explanation here]" + }}, + "Empowerment": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Provide explanation here]" + }}, + "Overall Winner": {{ + "Winner": "[Answer 1 or Answer 2]", + "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]" + }} +}} +``` +
+ +### Overall Performance Table +| | **Agriculture** | | **CS** | | **Legal** | | **Mix** | | +|----------------------|-------------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------| +| | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | NaiveRAG | **LightRAG** | +| **Comprehensiveness** | 32.69% | **67.31%** | 35.44% | **64.56%** | 19.05% | **80.95%** | 36.36% | **63.64%** | +| **Diversity** | 24.09% | **75.91%** | 35.24% | **64.76%** | 10.98% | **89.02%** | 30.76% | **69.24%** | +| **Empowerment** | 31.35% | **68.65%** | 35.48% | **64.52%** | 17.59% | **82.41%** | 40.95% | **59.05%** | +| **Overall** | 33.30% | **66.70%** | 34.76% | **65.24%** | 17.46% | **82.54%** | 37.59% | **62.40%** | +| | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | RQ-RAG | **LightRAG** | +| **Comprehensiveness** | 32.05% | **67.95%** | 39.30% | **60.70%** | 18.57% | **81.43%** | 38.89% | **61.11%** | +| **Diversity** | 29.44% | **70.56%** | 38.71% | **61.29%** | 15.14% | **84.86%** | 28.50% | **71.50%** | +| **Empowerment** | 32.51% | **67.49%** | 37.52% | **62.48%** | 17.80% | **82.20%** | 43.96% | **56.04%** | +| **Overall** | 33.29% | **66.71%** | 39.03% | **60.97%** | 17.80% | **82.20%** | 39.61% | **60.39%** | +| | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | HyDE | **LightRAG** | +| **Comprehensiveness** | 24.39% | **75.61%** | 36.49% | **63.51%** | 27.68% | **72.32%** | 42.17% | **57.83%** | +| **Diversity** | 24.96% | **75.34%** | 37.41% | **62.59%** | 18.79% | **81.21%** | 30.88% | **69.12%** | +| **Empowerment** | 24.89% | **75.11%** | 34.99% | **65.01%** | 26.99% | **73.01%** | **45.61%** | **54.39%** | +| **Overall** | 23.17% | **76.83%** | 35.67% | **64.33%** | 27.68% | **72.32%** | 42.72% | **57.28%** | +| | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | GraphRAG | **LightRAG** | +| **Comprehensiveness** | 45.56% | **54.44%** | 45.98% | **54.02%** | 47.13% | **52.87%** | **51.86%** | 48.14% | +| **Diversity** | 19.65% | **80.35%** | 39.64% | **60.36%** | 25.55% | **74.45%** | 35.87% | **64.13%** | +| **Empowerment** | 36.69% | **63.31%** | 45.09% | **54.91%** | 42.81% | **57.19%** | **52.94%** | 47.06% | +| **Overall** | 43.62% | **56.38%** | 45.98% | **54.02%** | 45.70% | **54.30%** | **51.86%** | 48.14% | + +## Reproduce +All the code can be found in the `./reproduce` directory. + +### Step-0 Extract Unique Contexts +First, we need to extract unique contexts in the datasets. + +
+ Code + +```python +def extract_unique_contexts(input_directory, output_directory): + + os.makedirs(output_directory, exist_ok=True) + + jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl')) + print(f"Found {len(jsonl_files)} JSONL files.") + + for file_path in jsonl_files: + filename = os.path.basename(file_path) + name, ext = os.path.splitext(filename) + output_filename = f"{name}_unique_contexts.json" + output_path = os.path.join(output_directory, output_filename) + + unique_contexts_dict = {} + + print(f"Processing file: {filename}") + + try: + with open(file_path, 'r', encoding='utf-8') as infile: + for line_number, line in enumerate(infile, start=1): + line = line.strip() + if not line: + continue + try: + json_obj = json.loads(line) + context = json_obj.get('context') + if context and context not in unique_contexts_dict: + unique_contexts_dict[context] = None + except json.JSONDecodeError as e: + print(f"JSON decoding error in file {filename} at line {line_number}: {e}") + except FileNotFoundError: + print(f"File not found: {filename}") + continue + except Exception as e: + print(f"An error occurred while processing file {filename}: {e}") + continue + + unique_contexts_list = list(unique_contexts_dict.keys()) + print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.") + + try: + with open(output_path, 'w', encoding='utf-8') as outfile: + json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4) + print(f"Unique `context` entries have been saved to: {output_filename}") + except Exception as e: + print(f"An error occurred while saving to the file {output_filename}: {e}") + + print("All files have been processed.") + +``` +
+ +### Step-1 Insert Contexts +For the extracted contexts, we insert them into the LightRAG system. + +
+ Code + +```python +def insert_text(rag, file_path): + with open(file_path, mode='r') as f: + unique_contexts = json.load(f) + + retries = 0 + max_retries = 3 + while retries < max_retries: + try: + rag.insert(unique_contexts) + break + except Exception as e: + retries += 1 + print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}") + time.sleep(10) + if retries == max_retries: + print("Insertion failed after exceeding the maximum number of retries") +``` +
+ +### Step-2 Generate Queries + +We extract tokens from the first and the second half of each context in the dataset, then combine them as dataset descriptions to generate queries. + +
+ Code + +```python +tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + +def get_summary(context, tot_tokens=2000): + tokens = tokenizer.tokenize(context) + half_tokens = tot_tokens // 2 + + start_tokens = tokens[1000:1000 + half_tokens] + end_tokens = tokens[-(1000 + half_tokens):1000] + + summary_tokens = start_tokens + end_tokens + summary = tokenizer.convert_tokens_to_string(summary_tokens) + + return summary +``` +
+ +### Step-3 Query +For the queries generated in Step-2, we will extract them and query LightRAG. + +
+ Code + +```python +def extract_queries(file_path): + with open(file_path, 'r') as f: + data = f.read() + + data = data.replace('**', '') + + queries = re.findall(r'- Question \d+: (.+)', data) + + return queries +``` +
+ +## Code Structure + +```python +. +โ”œโ”€โ”€ examples +โ”‚ โ”œโ”€โ”€ batch_eval.py +โ”‚ โ”œโ”€โ”€ generate_query.py +โ”‚ โ”œโ”€โ”€ graph_visual_with_html.py +โ”‚ โ”œโ”€โ”€ graph_visual_with_neo4j.py +โ”‚ โ”œโ”€โ”€ lightrag_api_openai_compatible_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_azure_openai_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_bedrock_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_hf_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_lmdeploy_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_ollama_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_openai_compatible_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_openai_demo.py +โ”‚ โ”œโ”€โ”€ lightrag_siliconcloud_demo.py +โ”‚ โ””โ”€โ”€ vram_management_demo.py +โ”œโ”€โ”€ lightrag +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ base.py +โ”‚ โ”œโ”€โ”€ lightrag.py +โ”‚ โ”œโ”€โ”€ llm.py +โ”‚ โ”œโ”€โ”€ operate.py +โ”‚ โ”œโ”€โ”€ prompt.py +โ”‚ โ”œโ”€โ”€ storage.py +โ”‚ โ””โ”€โ”€ utils.py +โ”œโ”€โ”€ reproduce +โ”‚ โ”œโ”€โ”€ Step_0.py +โ”‚ โ”œโ”€โ”€ Step_1_openai_compatible.py +โ”‚ โ”œโ”€โ”€ Step_1.py +โ”‚ โ”œโ”€โ”€ Step_2.py +โ”‚ โ”œโ”€โ”€ Step_3_openai_compatible.py +โ”‚ โ””โ”€โ”€ Step_3.py +โ”œโ”€โ”€ .gitignore +โ”œโ”€โ”€ .pre-commit-config.yaml +โ”œโ”€โ”€ LICENSE +โ”œโ”€โ”€ README.md +โ”œโ”€โ”€ requirements.txt +โ””โ”€โ”€ setup.py +``` + +## Star History + + + + + + Star History Chart + + + +## Citation + +```python +@article{guo2024lightrag, +title={LightRAG: Simple and Fast Retrieval-Augmented Generation}, +author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang}, +year={2024}, +eprint={2410.05779}, +archivePrefix={arXiv}, +primaryClass={cs.IR} +} ``` + From 24aedf35f23a54fe4c14aecc824abe3340cf4e8d Mon Sep 17 00:00:00 2001 From: Zhenyu Pan <120090196@link.cuhk.edu.cn> Date: Tue, 29 Oct 2024 23:29:47 +0800 Subject: [PATCH 36/39] [hotfix-#163] Fix asynchronous problem --- examples/lightrag_openai_compatible_demo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/lightrag_openai_compatible_demo.py b/examples/lightrag_openai_compatible_demo.py index 2470fc00..1422e2c2 100644 --- a/examples/lightrag_openai_compatible_demo.py +++ b/examples/lightrag_openai_compatible_demo.py @@ -69,25 +69,25 @@ async def main(): ) with open("./book.txt", "r", encoding="utf-8") as f: - rag.insert(f.read()) + await rag.ainsert(f.read()) # Perform naive search print( - rag.query( + await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="naive") ) ) # Perform local search print( - rag.query( + await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="local") ) ) # Perform global search print( - rag.query( + await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="global"), ) @@ -95,7 +95,7 @@ async def main(): # Perform hybrid search print( - rag.query( + await rag.aquery( "What are the top themes in this story?", param=QueryParam(mode="hybrid"), ) From 1d6754f01a16867fa52d7823118a4f0e871f89f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=9C=A8Data=20Intelligence=20Lab=40HKU=E2=9C=A8?= <118165258+HKUDS@users.noreply.github.com> Date: Wed, 30 Oct 2024 10:48:45 +0800 Subject: [PATCH 37/39] Update README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index acfd7c96..b85c3534 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,12 @@ This repository hosts the code of LightRAG. The structure of this code is based ## ๐ŸŽ‰ News -- [x] [2024.10.29]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขMulti-file types are now supported by `textract`. -- [x] [2024.10.20]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWeโ€™ve added a new feature to LightRAG: Graph Visualization. -- [x] [2024.10.18]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWeโ€™ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author! -- [x] [2024.10.17]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขWe have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! ๐ŸŽ‰๐ŸŽ‰ -- [x] [2024.10.16]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! -- [x] [2024.10.15]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ขLightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! +- [x] [2024.10.29]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`. +- [x] [2024.10.20]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข Weโ€™ve added a new feature to LightRAG: Graph Visualization. +- [x] [2024.10.18]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข Weโ€™ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author! +- [x] [2024.10.17]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! ๐ŸŽ‰๐ŸŽ‰ +- [x] [2024.10.16]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! +- [x] [2024.10.15]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! ## Algorithm Flowchart From 62d7ac2186b8ee1031a29179530f272c5b5e2d62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=9C=A8Data=20Intelligence=20Lab=40HKU=E2=9C=A8?= <118165258+HKUDS@users.noreply.github.com> Date: Wed, 30 Oct 2024 10:49:59 +0800 Subject: [PATCH 38/39] Update README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b85c3534..d506a12b 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,12 @@ This repository hosts the code of LightRAG. The structure of this code is based ## ๐ŸŽ‰ News -- [x] [2024.10.29]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`. -- [x] [2024.10.20]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข Weโ€™ve added a new feature to LightRAG: Graph Visualization. -- [x] [2024.10.18]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข Weโ€™ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author! -- [x] [2024.10.17]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข We have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! ๐ŸŽ‰๐ŸŽ‰ -- [x] [2024.10.16]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! -- [x] [2024.10.15]๐ŸŽฏ๐ŸŽฏ๐Ÿ“ข๐Ÿ“ข LightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! +- [x] [2024.10.29]๐ŸŽฏ๐Ÿ“ขLightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`. +- [x] [2024.10.20]๐ŸŽฏ๐Ÿ“ขWeโ€™ve added a new feature to LightRAG: Graph Visualization. +- [x] [2024.10.18]๐ŸŽฏ๐Ÿ“ขWeโ€™ve added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author! +- [x] [2024.10.17]๐ŸŽฏ๐Ÿ“ขWe have created a [Discord channel](https://discord.gg/mvsfu2Tg)! Welcome to join for sharing and discussions! ๐ŸŽ‰๐ŸŽ‰ +- [x] [2024.10.16]๐ŸŽฏ๐Ÿ“ขLightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! +- [x] [2024.10.15]๐ŸŽฏ๐Ÿ“ขLightRAG now supports [Hugging Face models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)! ## Algorithm Flowchart From f95384b68cd7ea0afda5efafe73587ac3b0f33a9 Mon Sep 17 00:00:00 2001 From: WinstonCHEN1 <1281838223@qq.com> Date: Wed, 30 Oct 2024 15:36:44 -0700 Subject: [PATCH 39/39] fix:Step_3.py context --- reproduce/Step_3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reproduce/Step_3.py b/reproduce/Step_3.py index a56190fc..2c5d699c 100644 --- a/reproduce/Step_3.py +++ b/reproduce/Step_3.py @@ -18,8 +18,8 @@ def extract_queries(file_path): async def process_query(query_text, rag_instance, query_param): try: - result, context = await rag_instance.aquery(query_text, param=query_param) - return {"query": query_text, "result": result, "context": context}, None + result = await rag_instance.aquery(query_text, param=query_param) + return {"query": query_text, "result": result}, None except Exception as e: return None, {"query": query_text, "error": str(e)}