From a445e556d8ea11e3edd305f78d98cdf709f71a21 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Sat, 9 Nov 2024 14:59:41 -0500 Subject: [PATCH 1/4] Update README.md add missing imports to examples in README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index b726f605..7d798b17 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,7 @@ rag = LightRAG( ```python from lightrag.llm import hf_model_complete, hf_embedding from transformers import AutoModel, AutoTokenizer +from lightrag.utils import EmbeddingFunc # Initialize LightRAG with Hugging Face model rag = LightRAG( @@ -172,6 +173,7 @@ Then you only need to set LightRAG as follows: ```python from lightrag.llm import ollama_model_complete, ollama_embedding +from lightrag.utils import EmbeddingFunc # Initialize LightRAG with Ollama model rag = LightRAG( From a307065828f991a5d089175f3df2f983a8523ce7 Mon Sep 17 00:00:00 2001 From: aiproductguy Date: Sat, 9 Nov 2024 18:48:10 -0700 Subject: [PATCH 2/4] Added GUI (linked), updated README.md I am not sure streamlit is the interface I want to contribute back to the LightRAG project, but willing to share it all under the same MIT license. I figured I would at least share the link and source code in the README. --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b726f605..6356db33 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@

πŸš€ LightRAG: Simple and Fast Retrieval-Augmented Generation

-![θ―·ζ·»εŠ ε›Ύη‰‡ζθΏ°](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg) +![LightRAG Image](https://i-blog.csdnimg.cn/direct/567139f1a36e4564abc63ce5c12b6271.jpeg)

@@ -18,10 +18,11 @@

This repository hosts the code of LightRAG. The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag). -![θ―·ζ·»εŠ ε›Ύη‰‡ζθΏ°](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png) +![LightRAG Diagram](https://i-blog.csdnimg.cn/direct/b2aaf634151b4706892693ffb43d9093.png)
## πŸŽ‰ News +- [x] [2024.11.09]πŸŽ―πŸ“’Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge. - [x] [2024.11.04]πŸŽ―πŸ“’You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now. - [x] [2024.10.29]πŸŽ―πŸ“’LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`. - [x] [2024.10.20]πŸŽ―πŸ“’We’ve added a new feature to LightRAG: Graph Visualization. From d0c1844264bec0e2c5fc339c23b45d5a5b436c52 Mon Sep 17 00:00:00 2001 From: LarFii <834462287@qq.com> Date: Mon, 11 Nov 2024 10:45:22 +0800 Subject: [PATCH 3/4] Linting --- Dockerfile | 2 +- README.md | 9 +++---- .../lightrag_api_openai_compatible_demo.py | 19 +++++++++----- lightrag/__init__.py | 2 +- lightrag/kg/__init__.py | 2 -- lightrag/kg/neo4j_impl.py | 4 +-- lightrag/lightrag.py | 1 - lightrag/operate.py | 25 +++++++++---------- test.py | 2 +- test_neo4j.py | 2 +- 10 files changed, 35 insertions(+), 33 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1b60c089..787816fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,4 +53,4 @@ VOLUME /data /logs EXPOSE 7474 7473 7687 ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"] -CMD ["neo4j"] \ No newline at end of file +CMD ["neo4j"] diff --git a/README.md b/README.md index ec53d444..3cbacab7 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,7 @@ rag = LightRAG( ### Using Neo4J for Storage * For production level scenarios you will most likely want to leverage an enterprise solution -* for KG storage. Running Neo4J in Docker is recommended for seamless local testing. +* for KG storage. Running Neo4J in Docker is recommended for seamless local testing. * See: https://hub.docker.com/_/neo4j @@ -209,7 +209,7 @@ When you launch the project be sure to override the default KG: NetworkS by specifying kg="Neo4JStorage". # Note: Default settings use NetworkX -#Initialize LightRAG with Neo4J implementation. +#Initialize LightRAG with Neo4J implementation. WORKING_DIR = "./local_neo4jWorkDir" rag = LightRAG( @@ -503,8 +503,8 @@ pip install fastapi uvicorn pydantic export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1" export OPENAI_API_KEY="Your OpenAI API key" # Required -export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini" -export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large" +export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini" +export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large" ``` 3. Run the API server: @@ -923,4 +923,3 @@ primaryClass={cs.IR} } ``` **Thank you for your interest in our work!** - diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index 20a05a5f..39001b10 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -33,7 +33,7 @@ if not os.path.exists(WORKING_DIR): async def llm_model_func( - prompt, system_prompt=None, history_messages=[], **kwargs + prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: return await openai_complete_if_cache( LLM_MODEL, @@ -66,9 +66,11 @@ async def get_embedding_dim(): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc(embedding_dim=asyncio.run(get_embedding_dim()), - max_token_size=EMBEDDING_MAX_TOKEN_SIZE, - func=embedding_func), + embedding_func=EmbeddingFunc( + embedding_dim=asyncio.run(get_embedding_dim()), + max_token_size=EMBEDDING_MAX_TOKEN_SIZE, + func=embedding_func, + ), ) @@ -99,8 +101,13 @@ async def query_endpoint(request: QueryRequest): try: loop = asyncio.get_event_loop() result = await loop.run_in_executor( - None, lambda: rag.query(request.query, - param=QueryParam(mode=request.mode, only_need_context=request.only_need_context)) + None, + lambda: rag.query( + request.query, + param=QueryParam( + mode=request.mode, only_need_context=request.only_need_context + ), + ), ) return Response(status="success", data=result) except Exception as e: diff --git a/lightrag/__init__.py b/lightrag/__init__.py index 8e76a260..b73db1b9 100644 --- a/lightrag/__init__.py +++ b/lightrag/__init__.py @@ -1,5 +1,5 @@ from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam -__version__ = "0.0.8" +__version__ = "0.0.9" __author__ = "Zirui Guo" __url__ = "https://github.com/HKUDS/LightRAG" diff --git a/lightrag/kg/__init__.py b/lightrag/kg/__init__.py index de9c1f9a..087eaac9 100644 --- a/lightrag/kg/__init__.py +++ b/lightrag/kg/__init__.py @@ -1,3 +1 @@ # print ("init package vars here. ......") - - diff --git a/lightrag/kg/neo4j_impl.py b/lightrag/kg/neo4j_impl.py index 4a3a4e66..e6b33a9b 100644 --- a/lightrag/kg/neo4j_impl.py +++ b/lightrag/kg/neo4j_impl.py @@ -146,11 +146,11 @@ class Neo4JStorage(BaseGraphStorage): entity_name_label_target = target_node_id.strip('"') """ Find all edges between nodes of two given labels - + Args: source_node_label (str): Label of the source nodes target_node_label (str): Label of the target nodes - + Returns: list: List of all relationships/edges found """ diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 2ae59f3b..3abe9185 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -61,7 +61,6 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop: return loop - @dataclass class LightRAG: working_dir: str = field( diff --git a/lightrag/operate.py b/lightrag/operate.py index 04725d6a..e86388dc 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -560,19 +560,19 @@ async def _find_most_related_text_unit_from_entities( if not this_edges: continue all_one_hop_nodes.update([e[1] for e in this_edges]) - + all_one_hop_nodes = list(all_one_hop_nodes) all_one_hop_nodes_data = await asyncio.gather( *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes] ) - + # Add null check for node data all_one_hop_text_units_lookup = { k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP])) for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data) if v is not None and "source_id" in v # Add source_id check } - + all_text_units_lookup = {} for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)): for c_id in this_text_units: @@ -586,7 +586,7 @@ async def _find_most_related_text_unit_from_entities( and c_id in all_one_hop_text_units_lookup[e[1]] ): relation_counts += 1 - + chunk_data = await text_chunks_db.get_by_id(c_id) if chunk_data is not None and "content" in chunk_data: # Add content check all_text_units_lookup[c_id] = { @@ -594,29 +594,28 @@ async def _find_most_related_text_unit_from_entities( "order": index, "relation_counts": relation_counts, } - + # Filter out None values and ensure data has content all_text_units = [ - {"id": k, **v} - for k, v in all_text_units_lookup.items() + {"id": k, **v} + for k, v in all_text_units_lookup.items() if v is not None and v.get("data") is not None and "content" in v["data"] ] - + if not all_text_units: logger.warning("No valid text units found") return [] - + all_text_units = sorted( - all_text_units, - key=lambda x: (x["order"], -x["relation_counts"]) + all_text_units, key=lambda x: (x["order"], -x["relation_counts"]) ) - + all_text_units = truncate_list_by_token_size( all_text_units, key=lambda x: x["data"]["content"], max_token_size=query_param.max_token_for_text_unit, ) - + all_text_units = [t["data"] for t in all_text_units] return all_text_units diff --git a/test.py b/test.py index 35c03afe..84cbe373 100644 --- a/test.py +++ b/test.py @@ -1,6 +1,6 @@ import os from lightrag import LightRAG, QueryParam -from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete +from lightrag.llm import gpt_4o_mini_complete ######### # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() # import nest_asyncio diff --git a/test_neo4j.py b/test_neo4j.py index 7b13734e..822cd7bc 100644 --- a/test_neo4j.py +++ b/test_neo4j.py @@ -1,6 +1,6 @@ import os from lightrag import LightRAG, QueryParam -from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete +from lightrag.llm import gpt_4o_mini_complete ######### From 319de6fece5a448e567ffbacbf1dc8ad2b1ae00c Mon Sep 17 00:00:00 2001 From: LarFii <834462287@qq.com> Date: Mon, 11 Nov 2024 10:52:01 +0800 Subject: [PATCH 4/4] Update README.md --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3cbacab7..59245c44 100644 --- a/README.md +++ b/README.md @@ -869,6 +869,9 @@ def extract_queries(file_path): β”‚ β”œβ”€β”€ lightrag_siliconcloud_demo.py β”‚ └── vram_management_demo.py β”œβ”€β”€ lightrag +β”‚ β”œβ”€β”€ kg +β”‚ β”‚ β”œβ”€β”€ __init__.py +β”‚ β”‚ └── neo4j_impl.py β”‚ β”œβ”€β”€ __init__.py β”‚ β”œβ”€β”€ base.py β”‚ β”œβ”€β”€ lightrag.py @@ -886,10 +889,14 @@ def extract_queries(file_path): β”‚ └── Step_3.py β”œβ”€β”€ .gitignore β”œβ”€β”€ .pre-commit-config.yaml +β”œβ”€β”€ Dockerfile +β”œβ”€β”€ get_all_edges_nx.py β”œβ”€β”€ LICENSE β”œβ”€β”€ README.md β”œβ”€β”€ requirements.txt -└── setup.py +β”œβ”€β”€ setup.py +β”œβ”€β”€ test_neo4j.py +└── test.py ``` ## Star History