update
This commit is contained in:
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024 Gustavo Ye
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
@@ -1 +1,6 @@
|
|||||||
# LightRAG
|
# LightRAG
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
## Acknowledgement
|
||||||
|
|
||||||
|
The structure of this code is based on [nano-graphrag](https://github.com/gusye1234/nano-graphrag).
|
@@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append('xxx/xxx/LightRAG')
|
|
||||||
|
|
||||||
from lightrag import LightRAG
|
from lightrag import LightRAG
|
||||||
|
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
sys.path.append('xxx/xxx/LightRAG')
|
|
||||||
|
|
||||||
from lightrag import LightRAG, QueryParam
|
from lightrag import LightRAG, QueryParam
|
||||||
|
|
||||||
@@ -13,5 +12,5 @@ rag = LightRAG(working_dir=WORKING_DIR)
|
|||||||
mode = 'global'
|
mode = 'global'
|
||||||
query_param = QueryParam(mode=mode)
|
query_param = QueryParam(mode=mode)
|
||||||
|
|
||||||
result, _ = rag.query("", param=query_param)
|
result = rag.query("", param=query_param)
|
||||||
print(result)
|
print(result)
|
@@ -1 +1,5 @@
|
|||||||
from .lightrag import LightRAG, QueryParam
|
from .lightrag import LightRAG, QueryParam
|
||||||
|
|
||||||
|
__version__ = "0.0.1"
|
||||||
|
__author__ = "Zirui Guo"
|
||||||
|
__url__ = "https://github.com/HKUDS/GraphEdit"
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -176,7 +176,6 @@ async def _merge_edges_then_upsert(
|
|||||||
already_weights = []
|
already_weights = []
|
||||||
already_source_ids = []
|
already_source_ids = []
|
||||||
already_description = []
|
already_description = []
|
||||||
##################
|
|
||||||
already_keywords = []
|
already_keywords = []
|
||||||
|
|
||||||
if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
|
if await knwoledge_graph_inst.has_edge(src_id, tgt_id):
|
||||||
@@ -186,7 +185,6 @@ async def _merge_edges_then_upsert(
|
|||||||
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
|
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
|
||||||
)
|
)
|
||||||
already_description.append(already_edge["description"])
|
already_description.append(already_edge["description"])
|
||||||
############
|
|
||||||
already_keywords.extend(
|
already_keywords.extend(
|
||||||
split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
|
split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
|
||||||
)
|
)
|
||||||
@@ -195,7 +193,6 @@ async def _merge_edges_then_upsert(
|
|||||||
description = GRAPH_FIELD_SEP.join(
|
description = GRAPH_FIELD_SEP.join(
|
||||||
sorted(set([dp["description"] for dp in edges_data] + already_description))
|
sorted(set([dp["description"] for dp in edges_data] + already_description))
|
||||||
)
|
)
|
||||||
##########
|
|
||||||
keywords = GRAPH_FIELD_SEP.join(
|
keywords = GRAPH_FIELD_SEP.join(
|
||||||
sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
|
sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
|
||||||
)
|
)
|
||||||
@@ -403,7 +400,7 @@ async def local_query(
|
|||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
# Handle parsing error
|
# Handle parsing error
|
||||||
print(f"JSON parsing error: {e}")
|
print(f"JSON parsing error: {e}")
|
||||||
return PROMPTS["fail_response"], "None"
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
context = await _build_local_query_context(
|
context = await _build_local_query_context(
|
||||||
keywords,
|
keywords,
|
||||||
@@ -415,7 +412,7 @@ async def local_query(
|
|||||||
if query_param.only_need_context:
|
if query_param.only_need_context:
|
||||||
return context
|
return context
|
||||||
if context is None:
|
if context is None:
|
||||||
return PROMPTS["fail_response"], "None"
|
return PROMPTS["fail_response"]
|
||||||
sys_prompt_temp = PROMPTS["rag_response"]
|
sys_prompt_temp = PROMPTS["rag_response"]
|
||||||
sys_prompt = sys_prompt_temp.format(
|
sys_prompt = sys_prompt_temp.format(
|
||||||
context_data=context, response_type=query_param.response_type
|
context_data=context, response_type=query_param.response_type
|
||||||
@@ -424,7 +421,7 @@ async def local_query(
|
|||||||
query,
|
query,
|
||||||
system_prompt=sys_prompt,
|
system_prompt=sys_prompt,
|
||||||
)
|
)
|
||||||
return response, context
|
return response
|
||||||
|
|
||||||
async def _build_local_query_context(
|
async def _build_local_query_context(
|
||||||
query,
|
query,
|
||||||
@@ -622,7 +619,7 @@ async def global_query(
|
|||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
# Handle parsing error
|
# Handle parsing error
|
||||||
print(f"JSON parsing error: {e}")
|
print(f"JSON parsing error: {e}")
|
||||||
return PROMPTS["fail_response"], "None"
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
context = await _build_global_query_context(
|
context = await _build_global_query_context(
|
||||||
keywords,
|
keywords,
|
||||||
@@ -636,7 +633,7 @@ async def global_query(
|
|||||||
if query_param.only_need_context:
|
if query_param.only_need_context:
|
||||||
return context
|
return context
|
||||||
if context is None:
|
if context is None:
|
||||||
return PROMPTS["fail_response"], "None"
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
sys_prompt_temp = PROMPTS["rag_response"]
|
sys_prompt_temp = PROMPTS["rag_response"]
|
||||||
sys_prompt = sys_prompt_temp.format(
|
sys_prompt = sys_prompt_temp.format(
|
||||||
@@ -646,7 +643,7 @@ async def global_query(
|
|||||||
query,
|
query,
|
||||||
system_prompt=sys_prompt,
|
system_prompt=sys_prompt,
|
||||||
)
|
)
|
||||||
return (response, context)
|
return response
|
||||||
|
|
||||||
async def _build_global_query_context(
|
async def _build_global_query_context(
|
||||||
keywords,
|
keywords,
|
||||||
@@ -836,7 +833,7 @@ async def hybird_query(
|
|||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
# Handle parsing error
|
# Handle parsing error
|
||||||
print(f"JSON parsing error: {e}")
|
print(f"JSON parsing error: {e}")
|
||||||
return PROMPTS["fail_response"], "None"
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
low_level_context = await _build_local_query_context(
|
low_level_context = await _build_local_query_context(
|
||||||
ll_keywords,
|
ll_keywords,
|
||||||
@@ -860,7 +857,7 @@ async def hybird_query(
|
|||||||
if query_param.only_need_context:
|
if query_param.only_need_context:
|
||||||
return context
|
return context
|
||||||
if context is None:
|
if context is None:
|
||||||
return PROMPTS["fail_response"], "None"
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
sys_prompt_temp = PROMPTS["rag_response"]
|
sys_prompt_temp = PROMPTS["rag_response"]
|
||||||
sys_prompt = sys_prompt_temp.format(
|
sys_prompt = sys_prompt_temp.format(
|
||||||
@@ -870,7 +867,7 @@ async def hybird_query(
|
|||||||
query,
|
query,
|
||||||
system_prompt=sys_prompt,
|
system_prompt=sys_prompt,
|
||||||
)
|
)
|
||||||
return (response, context)
|
return response
|
||||||
|
|
||||||
def combine_contexts(high_level_context, low_level_context):
|
def combine_contexts(high_level_context, low_level_context):
|
||||||
# Function to extract entities, relationships, and sources from context strings
|
# Function to extract entities, relationships, and sources from context strings
|
||||||
@@ -922,14 +919,14 @@ async def naive_query(
|
|||||||
use_model_func = global_config["llm_model_func"]
|
use_model_func = global_config["llm_model_func"]
|
||||||
results = await chunks_vdb.query(query, top_k=query_param.top_k)
|
results = await chunks_vdb.query(query, top_k=query_param.top_k)
|
||||||
if not len(results):
|
if not len(results):
|
||||||
return PROMPTS["fail_response"], "None"
|
return PROMPTS["fail_response"]
|
||||||
chunks_ids = [r["id"] for r in results]
|
chunks_ids = [r["id"] for r in results]
|
||||||
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
chunks = await text_chunks_db.get_by_ids(chunks_ids)
|
||||||
|
|
||||||
maybe_trun_chunks = truncate_list_by_token_size(
|
maybe_trun_chunks = truncate_list_by_token_size(
|
||||||
chunks,
|
chunks,
|
||||||
key=lambda x: x["content"],
|
key=lambda x: x["content"],
|
||||||
max_token_size=query_param.naive_max_token_for_text_unit,
|
max_token_size=query_param.max_token_for_text_unit,
|
||||||
)
|
)
|
||||||
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
|
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
|
||||||
section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
section = "--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
||||||
|
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
openai
|
||||||
|
tiktoken
|
||||||
|
networkx
|
||||||
|
graspologic
|
||||||
|
nano-vectordb
|
||||||
|
hnswlib
|
||||||
|
xxhash
|
||||||
|
tenacity
|
39
setup.py
Normal file
39
setup.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import setuptools
|
||||||
|
|
||||||
|
with open("README.md", "r") as fh:
|
||||||
|
long_description = fh.read()
|
||||||
|
|
||||||
|
|
||||||
|
vars2find = ["__author__", "__version__", "__url__"]
|
||||||
|
vars2readme = {}
|
||||||
|
with open("./lightrag/__init__.py") as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
for v in vars2find:
|
||||||
|
if line.startswith(v):
|
||||||
|
line = line.replace(" ", "").replace('"', "").replace("'", "").strip()
|
||||||
|
vars2readme[v] = line.split("=")[1]
|
||||||
|
|
||||||
|
deps = []
|
||||||
|
with open("./requirements.txt") as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
deps.append(line.strip())
|
||||||
|
|
||||||
|
setuptools.setup(
|
||||||
|
name="lightrag",
|
||||||
|
url=vars2readme["__url__"],
|
||||||
|
version=vars2readme["__version__"],
|
||||||
|
author=vars2readme["__author__"],
|
||||||
|
description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type="text/markdown",
|
||||||
|
packages=["lightrag"],
|
||||||
|
classifiers=[
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
],
|
||||||
|
python_requires=">=3.9",
|
||||||
|
install_requires=deps,
|
||||||
|
)
|
Reference in New Issue
Block a user