diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..61fc28be --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Gustavo Ye + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index fb65137c..7b7f3d29 100644 --- a/README.md +++ b/README.md @@ -1 +1,6 @@ # LightRAG + +## Citation +## Acknowledgement + +The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag). \ No newline at end of file diff --git a/examples/insert.py b/examples/insert.py index 95a3e150..d0689bae 100644 --- a/examples/insert.py +++ b/examples/insert.py @@ -1,6 +1,5 @@ import os import sys -sys.path.append('xxx/xxx/LightRAG') from lightrag import LightRAG diff --git a/examples/query.py b/examples/query.py index 60bf9346..b7de519b 100644 --- a/examples/query.py +++ b/examples/query.py @@ -1,6 +1,5 @@ import os import sys -sys.path.append('xxx/xxx/LightRAG') from lightrag import LightRAG, QueryParam @@ -13,5 +12,5 @@ rag = LightRAG(working_dir=WORKING_DIR) mode = 'global' query_param = QueryParam(mode=mode) -result, _ = rag.query("", param=query_param) +result = rag.query("", param=query_param) print(result) \ No newline at end of file diff --git a/lightrag/__init__.py b/lightrag/__init__.py index 63d03137..a83afba3 100644 --- a/lightrag/__init__.py +++ b/lightrag/__init__.py @@ -1 +1,5 @@ -from .lightrag import LightRAG, QueryParam \ No newline at end of file +from .lightrag import LightRAG, QueryParam + +__version__ = "0.0.1" +__author__ = "Zirui Guo" +__url__ = "https://github.com/HKUDS/GraphEdit" diff --git a/lightrag/__pycache__/__init__.cpython-310.pyc b/lightrag/__pycache__/__init__.cpython-310.pyc index aea125af..185bed6d 100644 Binary files a/lightrag/__pycache__/__init__.cpython-310.pyc and b/lightrag/__pycache__/__init__.cpython-310.pyc differ diff --git a/lightrag/__pycache__/base.cpython-310.pyc b/lightrag/__pycache__/base.cpython-310.pyc index 7839969c..4e0f8ec9 100644 Binary files a/lightrag/__pycache__/base.cpython-310.pyc and b/lightrag/__pycache__/base.cpython-310.pyc differ diff --git a/lightrag/__pycache__/llm.cpython-310.pyc b/lightrag/__pycache__/llm.cpython-310.pyc index 63b3832e..6d9fc0b2 100644 Binary files a/lightrag/__pycache__/llm.cpython-310.pyc and b/lightrag/__pycache__/llm.cpython-310.pyc differ diff --git a/lightrag/operate.py b/lightrag/operate.py index edea2007..a8564f0d 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -176,7 +176,6 @@ async def _merge_edges_then_upsert( already_weights = [] already_source_ids = [] already_description = [] - ################## already_keywords = [] if await knwoledge_graph_inst.has_edge(src_id, tgt_id): @@ -186,7 +185,6 @@ async def _merge_edges_then_upsert( split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP]) ) already_description.append(already_edge["description"]) - ############ already_keywords.extend( split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP]) ) @@ -195,7 +193,6 @@ async def _merge_edges_then_upsert( description = GRAPH_FIELD_SEP.join( sorted(set([dp["description"] for dp in edges_data] + already_description)) ) - ########## keywords = GRAPH_FIELD_SEP.join( sorted(set([dp["keywords"] for dp in edges_data] + already_keywords)) ) @@ -403,7 +400,7 @@ async def local_query( except json.JSONDecodeError as e: # Handle parsing error print(f"JSON parsing error: {e}") - return PROMPTS["fail_response"], "None" + return PROMPTS["fail_response"] context = await _build_local_query_context( keywords, @@ -415,7 +412,7 @@ async def local_query( if query_param.only_need_context: return context if context is None: - return PROMPTS["fail_response"], "None" + return PROMPTS["fail_response"] sys_prompt_temp = PROMPTS["rag_response"] sys_prompt = sys_prompt_temp.format( context_data=context, response_type=query_param.response_type @@ -424,7 +421,7 @@ async def local_query( query, system_prompt=sys_prompt, ) - return response, context + return response async def _build_local_query_context( query, @@ -622,7 +619,7 @@ async def global_query( except json.JSONDecodeError as e: # Handle parsing error print(f"JSON parsing error: {e}") - return PROMPTS["fail_response"], "None" + return PROMPTS["fail_response"] context = await _build_global_query_context( keywords, @@ -636,7 +633,7 @@ async def global_query( if query_param.only_need_context: return context if context is None: - return PROMPTS["fail_response"], "None" + return PROMPTS["fail_response"] sys_prompt_temp = PROMPTS["rag_response"] sys_prompt = sys_prompt_temp.format( @@ -646,7 +643,7 @@ async def global_query( query, system_prompt=sys_prompt, ) - return (response, context) + return response async def _build_global_query_context( keywords, @@ -836,7 +833,7 @@ async def hybird_query( except json.JSONDecodeError as e: # Handle parsing error print(f"JSON parsing error: {e}") - return PROMPTS["fail_response"], "None" + return PROMPTS["fail_response"] low_level_context = await _build_local_query_context( ll_keywords, @@ -860,7 +857,7 @@ async def hybird_query( if query_param.only_need_context: return context if context is None: - return PROMPTS["fail_response"], "None" + return PROMPTS["fail_response"] sys_prompt_temp = PROMPTS["rag_response"] sys_prompt = sys_prompt_temp.format( @@ -870,7 +867,7 @@ async def hybird_query( query, system_prompt=sys_prompt, ) - return (response, context) + return response def combine_contexts(high_level_context, low_level_context): # Function to extract entities, relationships, and sources from context strings @@ -922,14 +919,14 @@ async def naive_query( use_model_func = global_config["llm_model_func"] results = await chunks_vdb.query(query, top_k=query_param.top_k) if not len(results): - return PROMPTS["fail_response"], "None" + return PROMPTS["fail_response"] chunks_ids = [r["id"] for r in results] chunks = await text_chunks_db.get_by_ids(chunks_ids) maybe_trun_chunks = truncate_list_by_token_size( chunks, key=lambda x: x["content"], - max_token_size=query_param.naive_max_token_for_text_unit, + max_token_size=query_param.max_token_for_text_unit, ) logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks") section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks]) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..8a74d5e2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +openai +tiktoken +networkx +graspologic +nano-vectordb +hnswlib +xxhash +tenacity diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..0852ea5a --- /dev/null +++ b/setup.py @@ -0,0 +1,39 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + + +vars2find = ["__author__", "__version__", "__url__"] +vars2readme = {} +with open("./lightrag/__init__.py") as f: + for line in f.readlines(): + for v in vars2find: + if line.startswith(v): + line = line.replace(" ", "").replace('"', "").replace("'", "").strip() + vars2readme[v] = line.split("=")[1] + +deps = [] +with open("./requirements.txt") as f: + for line in f.readlines(): + if not line.strip(): + continue + deps.append(line.strip()) + +setuptools.setup( + name="lightrag", + url=vars2readme["__url__"], + version=vars2readme["__version__"], + author=vars2readme["__author__"], + description="LightRAG: Simple and Fast Retrieval-Augmented Generation", + long_description=long_description, + long_description_content_type="text/markdown", + packages=["lightrag"], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.9", + install_requires=deps, +)