This commit is contained in:
LarFii
2024-11-11 10:45:22 +08:00
parent 791917e9d6
commit b6b2e69773
10 changed files with 35 additions and 33 deletions

View File

@@ -53,4 +53,4 @@ VOLUME /data /logs
EXPOSE 7474 7473 7687 EXPOSE 7474 7473 7687
ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"] ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"]
CMD ["neo4j"] CMD ["neo4j"]

View File

@@ -196,7 +196,7 @@ rag = LightRAG(
### Using Neo4J for Storage ### Using Neo4J for Storage
* For production level scenarios you will most likely want to leverage an enterprise solution * For production level scenarios you will most likely want to leverage an enterprise solution
* for KG storage. Running Neo4J in Docker is recommended for seamless local testing. * for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
* See: https://hub.docker.com/_/neo4j * See: https://hub.docker.com/_/neo4j
@@ -209,7 +209,7 @@ When you launch the project be sure to override the default KG: NetworkS
by specifying kg="Neo4JStorage". by specifying kg="Neo4JStorage".
# Note: Default settings use NetworkX # Note: Default settings use NetworkX
#Initialize LightRAG with Neo4J implementation. #Initialize LightRAG with Neo4J implementation.
WORKING_DIR = "./local_neo4jWorkDir" WORKING_DIR = "./local_neo4jWorkDir"
rag = LightRAG( rag = LightRAG(
@@ -503,8 +503,8 @@ pip install fastapi uvicorn pydantic
export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default"
export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1" export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1"
export OPENAI_API_KEY="Your OpenAI API key" # Required export OPENAI_API_KEY="Your OpenAI API key" # Required
export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini" export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini"
export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large" export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large"
``` ```
3. Run the API server: 3. Run the API server:
@@ -923,4 +923,3 @@ primaryClass={cs.IR}
} }
``` ```
**Thank you for your interest in our work!** **Thank you for your interest in our work!**

View File

@@ -33,7 +33,7 @@ if not os.path.exists(WORKING_DIR):
async def llm_model_func( async def llm_model_func(
prompt, system_prompt=None, history_messages=[], **kwargs prompt, system_prompt=None, history_messages=[], **kwargs
) -> str: ) -> str:
return await openai_complete_if_cache( return await openai_complete_if_cache(
LLM_MODEL, LLM_MODEL,
@@ -66,9 +66,11 @@ async def get_embedding_dim():
rag = LightRAG( rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
llm_model_func=llm_model_func, llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(embedding_dim=asyncio.run(get_embedding_dim()), embedding_func=EmbeddingFunc(
max_token_size=EMBEDDING_MAX_TOKEN_SIZE, embedding_dim=asyncio.run(get_embedding_dim()),
func=embedding_func), max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
func=embedding_func,
),
) )
@@ -99,8 +101,13 @@ async def query_endpoint(request: QueryRequest):
try: try:
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
result = await loop.run_in_executor( result = await loop.run_in_executor(
None, lambda: rag.query(request.query, None,
param=QueryParam(mode=request.mode, only_need_context=request.only_need_context)) lambda: rag.query(
request.query,
param=QueryParam(
mode=request.mode, only_need_context=request.only_need_context
),
),
) )
return Response(status="success", data=result) return Response(status="success", data=result)
except Exception as e: except Exception as e:

View File

@@ -1,5 +1,5 @@
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
__version__ = "0.0.8" __version__ = "0.0.9"
__author__ = "Zirui Guo" __author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG" __url__ = "https://github.com/HKUDS/LightRAG"

View File

@@ -1,3 +1 @@
# print ("init package vars here. ......") # print ("init package vars here. ......")

View File

@@ -146,11 +146,11 @@ class Neo4JStorage(BaseGraphStorage):
entity_name_label_target = target_node_id.strip('"') entity_name_label_target = target_node_id.strip('"')
""" """
Find all edges between nodes of two given labels Find all edges between nodes of two given labels
Args: Args:
source_node_label (str): Label of the source nodes source_node_label (str): Label of the source nodes
target_node_label (str): Label of the target nodes target_node_label (str): Label of the target nodes
Returns: Returns:
list: List of all relationships/edges found list: List of all relationships/edges found
""" """

View File

@@ -61,7 +61,6 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
return loop return loop
@dataclass @dataclass
class LightRAG: class LightRAG:
working_dir: str = field( working_dir: str = field(

View File

@@ -560,19 +560,19 @@ async def _find_most_related_text_unit_from_entities(
if not this_edges: if not this_edges:
continue continue
all_one_hop_nodes.update([e[1] for e in this_edges]) all_one_hop_nodes.update([e[1] for e in this_edges])
all_one_hop_nodes = list(all_one_hop_nodes) all_one_hop_nodes = list(all_one_hop_nodes)
all_one_hop_nodes_data = await asyncio.gather( all_one_hop_nodes_data = await asyncio.gather(
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes] *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
) )
# Add null check for node data # Add null check for node data
all_one_hop_text_units_lookup = { all_one_hop_text_units_lookup = {
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP])) k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data) for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
if v is not None and "source_id" in v # Add source_id check if v is not None and "source_id" in v # Add source_id check
} }
all_text_units_lookup = {} all_text_units_lookup = {}
for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)): for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
for c_id in this_text_units: for c_id in this_text_units:
@@ -586,7 +586,7 @@ async def _find_most_related_text_unit_from_entities(
and c_id in all_one_hop_text_units_lookup[e[1]] and c_id in all_one_hop_text_units_lookup[e[1]]
): ):
relation_counts += 1 relation_counts += 1
chunk_data = await text_chunks_db.get_by_id(c_id) chunk_data = await text_chunks_db.get_by_id(c_id)
if chunk_data is not None and "content" in chunk_data: # Add content check if chunk_data is not None and "content" in chunk_data: # Add content check
all_text_units_lookup[c_id] = { all_text_units_lookup[c_id] = {
@@ -594,29 +594,28 @@ async def _find_most_related_text_unit_from_entities(
"order": index, "order": index,
"relation_counts": relation_counts, "relation_counts": relation_counts,
} }
# Filter out None values and ensure data has content # Filter out None values and ensure data has content
all_text_units = [ all_text_units = [
{"id": k, **v} {"id": k, **v}
for k, v in all_text_units_lookup.items() for k, v in all_text_units_lookup.items()
if v is not None and v.get("data") is not None and "content" in v["data"] if v is not None and v.get("data") is not None and "content" in v["data"]
] ]
if not all_text_units: if not all_text_units:
logger.warning("No valid text units found") logger.warning("No valid text units found")
return [] return []
all_text_units = sorted( all_text_units = sorted(
all_text_units, all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
key=lambda x: (x["order"], -x["relation_counts"])
) )
all_text_units = truncate_list_by_token_size( all_text_units = truncate_list_by_token_size(
all_text_units, all_text_units,
key=lambda x: x["data"]["content"], key=lambda x: x["data"]["content"],
max_token_size=query_param.max_token_for_text_unit, max_token_size=query_param.max_token_for_text_unit,
) )
all_text_units = [t["data"] for t in all_text_units] all_text_units = [t["data"] for t in all_text_units]
return all_text_units return all_text_units

View File

@@ -1,6 +1,6 @@
import os import os
from lightrag import LightRAG, QueryParam from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete from lightrag.llm import gpt_4o_mini_complete
######### #########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
# import nest_asyncio # import nest_asyncio

View File

@@ -1,6 +1,6 @@
import os import os
from lightrag import LightRAG, QueryParam from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete from lightrag.llm import gpt_4o_mini_complete
######### #########