cleaning code for pull
This commit is contained in:
56
Dockerfile
Normal file
56
Dockerfile
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
FROM debian:bullseye-slim
|
||||||
|
ENV JAVA_HOME=/opt/java/openjdk
|
||||||
|
COPY --from=eclipse-temurin:17 $JAVA_HOME $JAVA_HOME
|
||||||
|
ENV PATH="${JAVA_HOME}/bin:${PATH}" \
|
||||||
|
NEO4J_SHA256=7ce97bd9a4348af14df442f00b3dc5085b5983d6f03da643744838c7a1bc8ba7 \
|
||||||
|
NEO4J_TARBALL=neo4j-enterprise-5.24.2-unix.tar.gz \
|
||||||
|
NEO4J_EDITION=enterprise \
|
||||||
|
NEO4J_HOME="/var/lib/neo4j" \
|
||||||
|
LANG=C.UTF-8
|
||||||
|
ARG NEO4J_URI=https://dist.neo4j.org/neo4j-enterprise-5.24.2-unix.tar.gz
|
||||||
|
|
||||||
|
RUN addgroup --gid 7474 --system neo4j && adduser --uid 7474 --system --no-create-home --home "${NEO4J_HOME}" --ingroup neo4j neo4j
|
||||||
|
|
||||||
|
COPY ./local-package/* /startup/
|
||||||
|
|
||||||
|
RUN apt update \
|
||||||
|
&& apt-get install -y curl gcc git jq make procps tini wget \
|
||||||
|
&& curl --fail --silent --show-error --location --remote-name ${NEO4J_URI} \
|
||||||
|
&& echo "${NEO4J_SHA256} ${NEO4J_TARBALL}" | sha256sum -c --strict --quiet \
|
||||||
|
&& tar --extract --file ${NEO4J_TARBALL} --directory /var/lib \
|
||||||
|
&& mv /var/lib/neo4j-* "${NEO4J_HOME}" \
|
||||||
|
&& rm ${NEO4J_TARBALL} \
|
||||||
|
&& sed -i 's/Package Type:.*/Package Type: docker bullseye/' $NEO4J_HOME/packaging_info \
|
||||||
|
&& mv /startup/neo4j-admin-report.sh "${NEO4J_HOME}"/bin/neo4j-admin-report \
|
||||||
|
&& mv "${NEO4J_HOME}"/data /data \
|
||||||
|
&& mv "${NEO4J_HOME}"/logs /logs \
|
||||||
|
&& chown -R neo4j:neo4j /data \
|
||||||
|
&& chmod -R 777 /data \
|
||||||
|
&& chown -R neo4j:neo4j /logs \
|
||||||
|
&& chmod -R 777 /logs \
|
||||||
|
&& chown -R neo4j:neo4j "${NEO4J_HOME}" \
|
||||||
|
&& chmod -R 777 "${NEO4J_HOME}" \
|
||||||
|
&& chmod -R 755 "${NEO4J_HOME}/bin" \
|
||||||
|
&& ln -s /data "${NEO4J_HOME}"/data \
|
||||||
|
&& ln -s /logs "${NEO4J_HOME}"/logs \
|
||||||
|
&& git clone https://github.com/ncopa/su-exec.git \
|
||||||
|
&& cd su-exec \
|
||||||
|
&& git checkout 4c3bb42b093f14da70d8ab924b487ccfbb1397af \
|
||||||
|
&& echo d6c40440609a23483f12eb6295b5191e94baf08298a856bab6e15b10c3b82891 su-exec.c | sha256sum -c \
|
||||||
|
&& echo 2a87af245eb125aca9305a0b1025525ac80825590800f047419dc57bba36b334 Makefile | sha256sum -c \
|
||||||
|
&& make \
|
||||||
|
&& mv /su-exec/su-exec /usr/bin/su-exec \
|
||||||
|
&& apt-get -y purge --auto-remove curl gcc git make \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /su-exec
|
||||||
|
|
||||||
|
|
||||||
|
ENV PATH "${NEO4J_HOME}"/bin:$PATH
|
||||||
|
|
||||||
|
WORKDIR "${NEO4J_HOME}"
|
||||||
|
|
||||||
|
VOLUME /data /logs
|
||||||
|
|
||||||
|
EXPOSE 7474 7473 7687
|
||||||
|
|
||||||
|
ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"]
|
||||||
|
CMD ["neo4j"]
|
@@ -160,7 +160,10 @@ rag = LightRAG(
|
|||||||
<summary> Using Neo4J for Storage </summary>
|
<summary> Using Neo4J for Storage </summary>
|
||||||
|
|
||||||
* For production level scenarios you will most likely want to leverage an enterprise solution
|
* For production level scenarios you will most likely want to leverage an enterprise solution
|
||||||
for KG storage.
|
* for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
|
||||||
|
* See: https://hub.docker.com/_/neo4j
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
export NEO4J_URI="neo4j://localhost:7687"
|
export NEO4J_URI="neo4j://localhost:7687"
|
||||||
export NEO4J_USERNAME="neo4j"
|
export NEO4J_USERNAME="neo4j"
|
||||||
|
@@ -74,9 +74,6 @@ class GraphStorage(BaseGraphStorage):
|
|||||||
)
|
)
|
||||||
result = tx.run(query)
|
result = tx.run(query)
|
||||||
single_result = result.single()
|
single_result = result.single()
|
||||||
# if result.single() == None:
|
|
||||||
# print (f"this should not happen: ---- {label1}/{label2} {query}")
|
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{single_result["edgeExists"]}'
|
f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{single_result["edgeExists"]}'
|
||||||
)
|
)
|
||||||
@@ -84,7 +81,7 @@ class GraphStorage(BaseGraphStorage):
|
|||||||
return single_result["edgeExists"]
|
return single_result["edgeExists"]
|
||||||
def close(self):
|
def close(self):
|
||||||
self._driver.close()
|
self._driver.close()
|
||||||
#hard code relaitionship type
|
#hard code relaitionship type, directed.
|
||||||
with self._driver.session() as session:
|
with self._driver.session() as session:
|
||||||
result = session.read_transaction(_check_edge_existence, entity_name_label_source, entity_name_label_target)
|
result = session.read_transaction(_check_edge_existence, entity_name_label_source, entity_name_label_target)
|
||||||
return result
|
return result
|
||||||
@@ -111,7 +108,6 @@ class GraphStorage(BaseGraphStorage):
|
|||||||
|
|
||||||
def _find_node_degree(session, label):
|
def _find_node_degree(session, label):
|
||||||
with session.begin_transaction() as tx:
|
with session.begin_transaction() as tx:
|
||||||
# query = "MATCH (n:`{label}`) RETURN n, size((n)--()) AS degree".format(label=label)
|
|
||||||
query = f"""
|
query = f"""
|
||||||
MATCH (n:`{label}`)
|
MATCH (n:`{label}`)
|
||||||
RETURN COUNT{{ (n)--() }} AS totalEdgeCount
|
RETURN COUNT{{ (n)--() }} AS totalEdgeCount
|
||||||
@@ -132,7 +128,6 @@ class GraphStorage(BaseGraphStorage):
|
|||||||
return degree
|
return degree
|
||||||
|
|
||||||
|
|
||||||
# degree = session.read_transaction(get_edge_degree, 1, 2)
|
|
||||||
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
|
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
|
||||||
entity_name_label_source = src_id.strip('\"')
|
entity_name_label_source = src_id.strip('\"')
|
||||||
entity_name_label_target = tgt_id.strip('\"')
|
entity_name_label_target = tgt_id.strip('\"')
|
||||||
@@ -208,7 +203,6 @@ class GraphStorage(BaseGraphStorage):
|
|||||||
target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
||||||
|
|
||||||
if source_label and target_label:
|
if source_label and target_label:
|
||||||
print (f"appending: {(source_label, target_label)}")
|
|
||||||
edges.append((source_label, target_label))
|
edges.append((source_label, target_label))
|
||||||
|
|
||||||
return edges
|
return edges
|
||||||
@@ -218,44 +212,6 @@ class GraphStorage(BaseGraphStorage):
|
|||||||
return edges
|
return edges
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# from typing import List, Tuple
|
|
||||||
# async def get_node_connections(driver: GraphDatabase.driver, label: str) -> List[Tuple[str, str]]:
|
|
||||||
# def get_connections_for_node(tx):
|
|
||||||
# query = f"""
|
|
||||||
# MATCH (n:`{label}`)
|
|
||||||
# OPTIONAL MATCH (n)-[r]-(connected)
|
|
||||||
# RETURN n, r, connected
|
|
||||||
# """
|
|
||||||
# results = tx.run(query)
|
|
||||||
|
|
||||||
|
|
||||||
# connections = []
|
|
||||||
# for record in results:
|
|
||||||
# source_node = record['n']
|
|
||||||
# connected_node = record['connected']
|
|
||||||
|
|
||||||
# source_label = list(source_node.labels)[0] if source_node.labels else None
|
|
||||||
# target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
|
||||||
|
|
||||||
# if source_label and target_label:
|
|
||||||
# connections.append((source_label, target_label))
|
|
||||||
|
|
||||||
# logger.debug(
|
|
||||||
# f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{connections}'
|
|
||||||
# )
|
|
||||||
# return connections
|
|
||||||
|
|
||||||
# with driver.session() as session:
|
|
||||||
|
|
||||||
# return session.read_transaction(get_connections_for_node)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#upsert_node
|
|
||||||
|
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(3),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
@@ -366,32 +322,5 @@ class GraphStorage(BaseGraphStorage):
|
|||||||
# return result
|
# return result
|
||||||
|
|
||||||
async def _node2vec_embed(self):
|
async def _node2vec_embed(self):
|
||||||
print ("this is never called. checking to be sure.")
|
print ("Implemented but never called.")
|
||||||
|
|
||||||
# async def _node2vec_embed(self):
|
|
||||||
with self._driver.session() as session:
|
|
||||||
#Define the Cypher query
|
|
||||||
options = self.global_config["node2vec_params"]
|
|
||||||
logger.debug(f"building embeddings with options {options}")
|
|
||||||
query = f"""CALL gds.node2vec.write('91fbae6c', {
|
|
||||||
options
|
|
||||||
})
|
|
||||||
YIELD nodeId, labels, embedding
|
|
||||||
RETURN
|
|
||||||
nodeId AS id,
|
|
||||||
labels[0] AS distinctLabel,
|
|
||||||
embedding AS nodeToVecEmbedding
|
|
||||||
"""
|
|
||||||
# Run the query and process the results
|
|
||||||
results = session.run(query)
|
|
||||||
embeddings = []
|
|
||||||
node_labels = []
|
|
||||||
for record in results:
|
|
||||||
node_id = record["id"]
|
|
||||||
embedding = record["nodeToVecEmbedding"]
|
|
||||||
label = record["distinctLabel"]
|
|
||||||
print(f"Node id/label: {label}/{node_id}, Embedding: {embedding}")
|
|
||||||
embeddings.append(embedding)
|
|
||||||
node_labels.append(label)
|
|
||||||
return embeddings, node_labels
|
|
||||||
|
|
||||||
|
22
test.py
22
test.py
@@ -8,27 +8,7 @@ from pprint import pprint
|
|||||||
# nest_asyncio.apply()
|
# nest_asyncio.apply()
|
||||||
#########
|
#########
|
||||||
|
|
||||||
WORKING_DIR = "./dickensTestEmbedcall"
|
WORKING_DIR = "./dickens"
|
||||||
|
|
||||||
|
|
||||||
# G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
|
|
||||||
# nx.write_gexf(G, "graph_chunk_entity_relation.gefx")
|
|
||||||
|
|
||||||
import networkx as nx
|
|
||||||
from networkx_query import search_nodes, search_edges
|
|
||||||
G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
|
|
||||||
query = {} # Empty query matches all nodes
|
|
||||||
result = search_nodes(G, query)
|
|
||||||
|
|
||||||
# Extract node IDs from the result
|
|
||||||
node_ids = sorted([node for node in result])
|
|
||||||
|
|
||||||
print("All node IDs in the graph:")
|
|
||||||
pprint(node_ids)
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
|
|
||||||
# raise Exception
|
|
||||||
|
|
||||||
if not os.path.exists(WORKING_DIR):
|
if not os.path.exists(WORKING_DIR):
|
||||||
os.mkdir(WORKING_DIR)
|
os.mkdir(WORKING_DIR)
|
||||||
|
@@ -17,7 +17,7 @@ rag = LightRAG(
|
|||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
|
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
|
||||||
kg="Neo4JStorage",
|
kg="Neo4JStorage",
|
||||||
log_level="DEBUG"
|
log_level="INFO"
|
||||||
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user