cleaning code for pull
This commit is contained in:
56
Dockerfile
Normal file
56
Dockerfile
Normal file
@@ -0,0 +1,56 @@
|
||||
FROM debian:bullseye-slim
|
||||
ENV JAVA_HOME=/opt/java/openjdk
|
||||
COPY --from=eclipse-temurin:17 $JAVA_HOME $JAVA_HOME
|
||||
ENV PATH="${JAVA_HOME}/bin:${PATH}" \
|
||||
NEO4J_SHA256=7ce97bd9a4348af14df442f00b3dc5085b5983d6f03da643744838c7a1bc8ba7 \
|
||||
NEO4J_TARBALL=neo4j-enterprise-5.24.2-unix.tar.gz \
|
||||
NEO4J_EDITION=enterprise \
|
||||
NEO4J_HOME="/var/lib/neo4j" \
|
||||
LANG=C.UTF-8
|
||||
ARG NEO4J_URI=https://dist.neo4j.org/neo4j-enterprise-5.24.2-unix.tar.gz
|
||||
|
||||
RUN addgroup --gid 7474 --system neo4j && adduser --uid 7474 --system --no-create-home --home "${NEO4J_HOME}" --ingroup neo4j neo4j
|
||||
|
||||
COPY ./local-package/* /startup/
|
||||
|
||||
RUN apt update \
|
||||
&& apt-get install -y curl gcc git jq make procps tini wget \
|
||||
&& curl --fail --silent --show-error --location --remote-name ${NEO4J_URI} \
|
||||
&& echo "${NEO4J_SHA256} ${NEO4J_TARBALL}" | sha256sum -c --strict --quiet \
|
||||
&& tar --extract --file ${NEO4J_TARBALL} --directory /var/lib \
|
||||
&& mv /var/lib/neo4j-* "${NEO4J_HOME}" \
|
||||
&& rm ${NEO4J_TARBALL} \
|
||||
&& sed -i 's/Package Type:.*/Package Type: docker bullseye/' $NEO4J_HOME/packaging_info \
|
||||
&& mv /startup/neo4j-admin-report.sh "${NEO4J_HOME}"/bin/neo4j-admin-report \
|
||||
&& mv "${NEO4J_HOME}"/data /data \
|
||||
&& mv "${NEO4J_HOME}"/logs /logs \
|
||||
&& chown -R neo4j:neo4j /data \
|
||||
&& chmod -R 777 /data \
|
||||
&& chown -R neo4j:neo4j /logs \
|
||||
&& chmod -R 777 /logs \
|
||||
&& chown -R neo4j:neo4j "${NEO4J_HOME}" \
|
||||
&& chmod -R 777 "${NEO4J_HOME}" \
|
||||
&& chmod -R 755 "${NEO4J_HOME}/bin" \
|
||||
&& ln -s /data "${NEO4J_HOME}"/data \
|
||||
&& ln -s /logs "${NEO4J_HOME}"/logs \
|
||||
&& git clone https://github.com/ncopa/su-exec.git \
|
||||
&& cd su-exec \
|
||||
&& git checkout 4c3bb42b093f14da70d8ab924b487ccfbb1397af \
|
||||
&& echo d6c40440609a23483f12eb6295b5191e94baf08298a856bab6e15b10c3b82891 su-exec.c | sha256sum -c \
|
||||
&& echo 2a87af245eb125aca9305a0b1025525ac80825590800f047419dc57bba36b334 Makefile | sha256sum -c \
|
||||
&& make \
|
||||
&& mv /su-exec/su-exec /usr/bin/su-exec \
|
||||
&& apt-get -y purge --auto-remove curl gcc git make \
|
||||
&& rm -rf /var/lib/apt/lists/* /su-exec
|
||||
|
||||
|
||||
ENV PATH "${NEO4J_HOME}"/bin:$PATH
|
||||
|
||||
WORKDIR "${NEO4J_HOME}"
|
||||
|
||||
VOLUME /data /logs
|
||||
|
||||
EXPOSE 7474 7473 7687
|
||||
|
||||
ENTRYPOINT ["tini", "-g", "--", "/startup/docker-entrypoint.sh"]
|
||||
CMD ["neo4j"]
|
@@ -160,7 +160,10 @@ rag = LightRAG(
|
||||
<summary> Using Neo4J for Storage </summary>
|
||||
|
||||
* For production level scenarios you will most likely want to leverage an enterprise solution
|
||||
for KG storage.
|
||||
* for KG storage. Running Neo4J in Docker is recommended for seamless local testing.
|
||||
* See: https://hub.docker.com/_/neo4j
|
||||
|
||||
|
||||
```python
|
||||
export NEO4J_URI="neo4j://localhost:7687"
|
||||
export NEO4J_USERNAME="neo4j"
|
||||
|
@@ -74,9 +74,6 @@ class GraphStorage(BaseGraphStorage):
|
||||
)
|
||||
result = tx.run(query)
|
||||
single_result = result.single()
|
||||
# if result.single() == None:
|
||||
# print (f"this should not happen: ---- {label1}/{label2} {query}")
|
||||
|
||||
logger.debug(
|
||||
f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{single_result["edgeExists"]}'
|
||||
)
|
||||
@@ -84,7 +81,7 @@ class GraphStorage(BaseGraphStorage):
|
||||
return single_result["edgeExists"]
|
||||
def close(self):
|
||||
self._driver.close()
|
||||
#hard code relaitionship type
|
||||
#hard code relaitionship type, directed.
|
||||
with self._driver.session() as session:
|
||||
result = session.read_transaction(_check_edge_existence, entity_name_label_source, entity_name_label_target)
|
||||
return result
|
||||
@@ -111,7 +108,6 @@ class GraphStorage(BaseGraphStorage):
|
||||
|
||||
def _find_node_degree(session, label):
|
||||
with session.begin_transaction() as tx:
|
||||
# query = "MATCH (n:`{label}`) RETURN n, size((n)--()) AS degree".format(label=label)
|
||||
query = f"""
|
||||
MATCH (n:`{label}`)
|
||||
RETURN COUNT{{ (n)--() }} AS totalEdgeCount
|
||||
@@ -132,7 +128,6 @@ class GraphStorage(BaseGraphStorage):
|
||||
return degree
|
||||
|
||||
|
||||
# degree = session.read_transaction(get_edge_degree, 1, 2)
|
||||
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
|
||||
entity_name_label_source = src_id.strip('\"')
|
||||
entity_name_label_target = tgt_id.strip('\"')
|
||||
@@ -208,7 +203,6 @@ class GraphStorage(BaseGraphStorage):
|
||||
target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
||||
|
||||
if source_label and target_label:
|
||||
print (f"appending: {(source_label, target_label)}")
|
||||
edges.append((source_label, target_label))
|
||||
|
||||
return edges
|
||||
@@ -218,44 +212,6 @@ class GraphStorage(BaseGraphStorage):
|
||||
return edges
|
||||
|
||||
|
||||
|
||||
# from typing import List, Tuple
|
||||
# async def get_node_connections(driver: GraphDatabase.driver, label: str) -> List[Tuple[str, str]]:
|
||||
# def get_connections_for_node(tx):
|
||||
# query = f"""
|
||||
# MATCH (n:`{label}`)
|
||||
# OPTIONAL MATCH (n)-[r]-(connected)
|
||||
# RETURN n, r, connected
|
||||
# """
|
||||
# results = tx.run(query)
|
||||
|
||||
|
||||
# connections = []
|
||||
# for record in results:
|
||||
# source_node = record['n']
|
||||
# connected_node = record['connected']
|
||||
|
||||
# source_label = list(source_node.labels)[0] if source_node.labels else None
|
||||
# target_label = list(connected_node.labels)[0] if connected_node and connected_node.labels else None
|
||||
|
||||
# if source_label and target_label:
|
||||
# connections.append((source_label, target_label))
|
||||
|
||||
# logger.debug(
|
||||
# f'{inspect.currentframe().f_code.co_name}:query:{query}:result:{connections}'
|
||||
# )
|
||||
# return connections
|
||||
|
||||
# with driver.session() as session:
|
||||
|
||||
# return session.read_transaction(get_connections_for_node)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#upsert_node
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
@@ -366,32 +322,5 @@ class GraphStorage(BaseGraphStorage):
|
||||
# return result
|
||||
|
||||
async def _node2vec_embed(self):
|
||||
print ("this is never called. checking to be sure.")
|
||||
|
||||
# async def _node2vec_embed(self):
|
||||
with self._driver.session() as session:
|
||||
#Define the Cypher query
|
||||
options = self.global_config["node2vec_params"]
|
||||
logger.debug(f"building embeddings with options {options}")
|
||||
query = f"""CALL gds.node2vec.write('91fbae6c', {
|
||||
options
|
||||
})
|
||||
YIELD nodeId, labels, embedding
|
||||
RETURN
|
||||
nodeId AS id,
|
||||
labels[0] AS distinctLabel,
|
||||
embedding AS nodeToVecEmbedding
|
||||
"""
|
||||
# Run the query and process the results
|
||||
results = session.run(query)
|
||||
embeddings = []
|
||||
node_labels = []
|
||||
for record in results:
|
||||
node_id = record["id"]
|
||||
embedding = record["nodeToVecEmbedding"]
|
||||
label = record["distinctLabel"]
|
||||
print(f"Node id/label: {label}/{node_id}, Embedding: {embedding}")
|
||||
embeddings.append(embedding)
|
||||
node_labels.append(label)
|
||||
return embeddings, node_labels
|
||||
print ("Implemented but never called.")
|
||||
|
||||
|
22
test.py
22
test.py
@@ -8,27 +8,7 @@ from pprint import pprint
|
||||
# nest_asyncio.apply()
|
||||
#########
|
||||
|
||||
WORKING_DIR = "./dickensTestEmbedcall"
|
||||
|
||||
|
||||
# G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
|
||||
# nx.write_gexf(G, "graph_chunk_entity_relation.gefx")
|
||||
|
||||
import networkx as nx
|
||||
from networkx_query import search_nodes, search_edges
|
||||
G = nx.read_graphml('./dickensTestEmbedcall/graph_chunk_entity_relation.graphml')
|
||||
query = {} # Empty query matches all nodes
|
||||
result = search_nodes(G, query)
|
||||
|
||||
# Extract node IDs from the result
|
||||
node_ids = sorted([node for node in result])
|
||||
|
||||
print("All node IDs in the graph:")
|
||||
pprint(node_ids)
|
||||
raise Exception
|
||||
|
||||
|
||||
# raise Exception
|
||||
WORKING_DIR = "./dickens"
|
||||
|
||||
if not os.path.exists(WORKING_DIR):
|
||||
os.mkdir(WORKING_DIR)
|
||||
|
@@ -17,7 +17,7 @@ rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
|
||||
kg="Neo4JStorage",
|
||||
log_level="DEBUG"
|
||||
log_level="INFO"
|
||||
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user