cleaning code for pull

This commit is contained in:
Ken Wiltshire
2024-11-01 16:11:19 -04:00
parent 13940c1726
commit f375620992
14 changed files with 39 additions and 28678 deletions

View File

@@ -155,6 +155,36 @@ rag = LightRAG(
``` ```
</details> </details>
<details>
<summary> Using Neo4J for Storage </summary>
* For production level scenarios you will most likely want to leverage an enterprise solution
for KG storage.
```python
export NEO4J_URI="neo4j://localhost:7687"
export NEO4J_USERNAME="neo4j"
export NEO4J_PASSWORD="password"
When you launch the project be sure to override the default KG: NetworkS
by specifying kg="Neo4JStorage".
# Note: Default settings use NetworkX
#Initialize LightRAG with Neo4J implementation.
WORKING_DIR = "./local_neo4jWorkDir"
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
kg="Neo4JStorage", #<-----------override KG default
log_level="DEBUG" #<-----------override log_level default
)
```
see test_neo4j.py for a working example.
</details>
<details> <details>
<summary> Using Ollama Models </summary> <summary> Using Ollama Models </summary>

View File

@@ -1,27 +1,3 @@
print ("init package vars here. ......") # print ("init package vars here. ......")
# from .neo4j import GraphStorage as Neo4JStorage
# import sys
# import importlib
# # Specify the path to the directory containing the module
# # Add the directory to the system path
# module_dir = '/Users/kenwiltshire/documents/dev/LightRag/lightrag/kg'
# sys.path.append(module_dir)
# # Specify the module name
# module_name = 'neo4j'
# # Import the module
# spec = importlib.util.spec_from_file_location(module_name, f'{module_dir}/{module_name}.py')
# Neo4JStorage = importlib.util.module_from_spec(spec)
# spec.loader.exec_module(Neo4JStorage)
# Relative imports are still possible by adding a leading period to the module name when using the from ... import form:
# # Import names from pkg.string
# from .string import name1, name2
# # Import pkg.string
# from . import string

View File

@@ -120,9 +120,6 @@ class LightRAG:
addon_params: dict = field(default_factory=dict) addon_params: dict = field(default_factory=dict)
convert_response_to_json_func: callable = convert_response_to_json convert_response_to_json_func: callable = convert_response_to_json
# def get_configured_KG(self):
# return self.kg
def __post_init__(self): def __post_init__(self):
log_file = os.path.join(self.working_dir, "lightrag.log") log_file = os.path.join(self.working_dir, "lightrag.log")
set_logger(log_file) set_logger(log_file)
@@ -133,7 +130,7 @@ class LightRAG:
_print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()]) _print_config = ",\n ".join([f"{k} = {v}" for k, v in asdict(self).items()])
logger.debug(f"LightRAG init with param:\n {_print_config}\n") logger.debug(f"LightRAG init with param:\n {_print_config}\n")
#should move all storage setup here to leverage initial start params attached to self. #@TODO: should move all storage setup here to leverage initial start params attached to self.
self.graph_storage_cls: Type[BaseGraphStorage] = self._get_storage_class()[self.kg] self.graph_storage_cls: Type[BaseGraphStorage] = self._get_storage_class()[self.kg]
if not os.path.exists(self.working_dir): if not os.path.exists(self.working_dir):

View File

@@ -72,9 +72,7 @@ async def openai_complete_if_cache(
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
#kw_ wait=wait_exponential(multiplier=1, min=4, max=10),
wait=wait_exponential(multiplier=1, min=10, max=60),
# wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
) )
async def azure_openai_complete_if_cache(model, async def azure_openai_complete_if_cache(model,

View File

@@ -908,7 +908,6 @@ async def hybrid_query(
.strip() .strip()
) )
result = "{" + result.split("{")[1].split("}")[0] + "}" result = "{" + result.split("{")[1].split("}")[0] + "}"
keywords_data = json.loads(result) keywords_data = json.loads(result)
hl_keywords = keywords_data.get("high_level_keywords", []) hl_keywords = keywords_data.get("high_level_keywords", [])
ll_keywords = keywords_data.get("low_level_keywords", []) ll_keywords = keywords_data.get("low_level_keywords", [])

View File

@@ -95,7 +95,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
embeddings = np.concatenate(embeddings_list) embeddings = np.concatenate(embeddings_list)
for i, d in enumerate(list_data): for i, d in enumerate(list_data):
d["__vector__"] = embeddings[i] d["__vector__"] = embeddings[i]
print (f"Upserting to vector: {list_data}")
results = self._client.upsert(datas=list_data) results = self._client.upsert(datas=list_data)
return results return results
@@ -110,7 +109,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
results = [ results = [
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results {**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
] ]
print (f"vector db results {results} for query {query}")
return results return results
async def index_done_callback(self): async def index_done_callback(self):
@@ -235,9 +233,11 @@ class NetworkXStorage(BaseGraphStorage):
raise ValueError(f"Node embedding algorithm {algorithm} not supported") raise ValueError(f"Node embedding algorithm {algorithm} not supported")
return await self._node_embed_algorithms[algorithm]() return await self._node_embed_algorithms[algorithm]()
#@TODO: NOT USED
async def _node2vec_embed(self): async def _node2vec_embed(self):
from graspologic import embed from graspologic import embed
print ("is this ever called?")
embeddings, nodes = embed.node2vec_embed( embeddings, nodes = embed.node2vec_embed(
self._graph, self._graph,
**self.global_config["node2vec_params"], **self.global_config["node2vec_params"],

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -17,12 +17,12 @@ rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
kg="Neo4JStorage", kg="Neo4JStorage",
log_level="INFO" log_level="DEBUG"
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
) )
# with open("./book.txt") as f: with open("./book.txt") as f:
# rag.insert(f.read()) rag.insert(f.read())
# Perform naive search # Perform naive search
print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))) print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")))