fix examples

This commit is contained in:
ArnoChen
2025-02-19 04:12:16 +08:00
parent e194e04226
commit 7a970451b9
4 changed files with 27 additions and 112 deletions

View File

@@ -48,6 +48,14 @@ print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
os.environ["ORACLE_USER"] = ""
os.environ["ORACLE_PASSWORD"] = ""
os.environ["ORACLE_DSN"] = ""
os.environ["ORACLE_CONFIG_DIR"] = "path_to_config_dir"
os.environ["ORACLE_WALLET_LOCATION"] = "path_to_wallet_location"
os.environ["ORACLE_WALLET_PASSWORD"] = "wallet_password"
os.environ["ORACLE_WORKSPACE"] = "company"
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
@@ -89,20 +97,6 @@ async def init():
# We storage data in unified tables, so we need to set a `workspace` parameter to specify which docs we want to store and query
# Below is an example of how to connect to Oracle Autonomous Database on Oracle Cloud
oracle_db = OracleDB(
config={
"user": "",
"password": "",
"dsn": "",
"config_dir": "path_to_config_dir",
"wallet_location": "path_to_wallet_location",
"wallet_password": "wallet_password",
"workspace": "company",
} # specify which docs you want to store and query
)
# Check if Oracle DB tables exist, if not, tables will be created
await oracle_db.check_tables()
# Initialize LightRAG
# We use Oracle DB as the KV/vector/graph storage
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
@@ -121,11 +115,6 @@ async def init():
vector_storage="OracleVectorDBStorage",
)
# Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
rag.graph_storage_cls.db = oracle_db
rag.key_string_value_json_storage_cls.db = oracle_db
rag.vector_db_storage_cls.db = oracle_db
return rag

View File

@@ -26,6 +26,14 @@ MAX_TOKENS = 4000
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
os.environ["ORACLE_USER"] = "username"
os.environ["ORACLE_PASSWORD"] = "xxxxxxxxx"
os.environ["ORACLE_DSN"] = "xxxxxxx_medium"
os.environ["ORACLE_CONFIG_DIR"] = "path_to_config_dir"
os.environ["ORACLE_WALLET_LOCATION"] = "path_to_wallet_location"
os.environ["ORACLE_WALLET_PASSWORD"] = "wallet_password"
os.environ["ORACLE_WORKSPACE"] = "company"
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
@@ -63,26 +71,6 @@ async def main():
embedding_dimension = await get_embedding_dim()
print(f"Detected embedding dimension: {embedding_dimension}")
# Create Oracle DB connection
# The `config` parameter is the connection configuration of Oracle DB
# More docs here https://python-oracledb.readthedocs.io/en/latest/user_guide/connection_handling.html
# We storage data in unified tables, so we need to set a `workspace` parameter to specify which docs we want to store and query
# Below is an example of how to connect to Oracle Autonomous Database on Oracle Cloud
oracle_db = OracleDB(
config={
"user": "username",
"password": "xxxxxxxxx",
"dsn": "xxxxxxx_medium",
"config_dir": "dir/path/to/oracle/config",
"wallet_location": "dir/path/to/oracle/wallet",
"wallet_password": "xxxxxxxxx",
"workspace": "company", # specify which docs you want to store and query
}
)
# Check if Oracle DB tables exist, if not, tables will be created
await oracle_db.check_tables()
# Initialize LightRAG
# We use Oracle DB as the KV/vector/graph storage
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
@@ -112,26 +100,6 @@ async def main():
},
)
# Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
for storage in [
rag.vector_db_storage_cls,
rag.graph_storage_cls,
rag.doc_status,
rag.full_docs,
rag.text_chunks,
rag.llm_response_cache,
rag.key_string_value_json_storage_cls,
rag.chunks_vdb,
rag.relationships_vdb,
rag.entities_vdb,
rag.graph_storage_cls,
rag.chunk_entity_relation_graph,
rag.llm_response_cache,
]:
# set client
storage.db = oracle_db
# Extract and Insert into LightRAG storage
with open(WORKING_DIR + "/docs.txt", "r", encoding="utf-8") as f:
all_text = f.read()

View File

@@ -17,11 +17,11 @@ APIKEY = ""
CHATMODEL = ""
EMBEDMODEL = ""
TIDB_HOST = ""
TIDB_PORT = ""
TIDB_USER = ""
TIDB_PASSWORD = ""
TIDB_DATABASE = "lightrag"
os.environ["TIDB_HOST"] = ""
os.environ["TIDB_PORT"] = ""
os.environ["TIDB_USER"] = ""
os.environ["TIDB_PASSWORD"] = ""
os.environ["TIDB_DATABASE"] = "lightrag"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
@@ -62,21 +62,6 @@ async def main():
embedding_dimension = await get_embedding_dim()
print(f"Detected embedding dimension: {embedding_dimension}")
# Create TiDB DB connection
tidb = TiDB(
config={
"host": TIDB_HOST,
"port": TIDB_PORT,
"user": TIDB_USER,
"password": TIDB_PASSWORD,
"database": TIDB_DATABASE,
"workspace": "company", # specify which docs you want to store and query
}
)
# Check if TiDB DB tables exist, if not, tables will be created
await tidb.check_tables()
# Initialize LightRAG
# We use TiDB DB as the KV/vector
# You can add `addon_params={"example_number": 1, "language": "Simplfied Chinese"}` to control the prompt
@@ -95,15 +80,6 @@ async def main():
graph_storage="TiDBGraphStorage",
)
if rag.llm_response_cache:
rag.llm_response_cache.db = tidb
rag.full_docs.db = tidb
rag.text_chunks.db = tidb
rag.entities_vdb.db = tidb
rag.relationships_vdb.db = tidb
rag.chunks_vdb.db = tidb
rag.chunk_entity_relation_graph.db = tidb
# Extract and Insert into LightRAG storage
with open("./dickens/demo.txt", "r", encoding="utf-8") as f:
await rag.ainsert(f.read())

View File

@@ -22,22 +22,14 @@ if not os.path.exists(WORKING_DIR):
# AGE
os.environ["AGE_GRAPH_NAME"] = "dickens"
postgres_db = PostgreSQLDB(
config={
"host": "localhost",
"port": 15432,
"user": "rag",
"password": "rag",
"database": "rag",
}
)
os.environ["POSTGRES_HOST"] = "localhost"
os.environ["POSTGRES_PORT"] = "15432"
os.environ["POSTGRES_USER"] = "rag"
os.environ["POSTGRES_PASSWORD"] = "rag"
os.environ["POSTGRES_DATABASE"] = "rag"
async def main():
await postgres_db.initdb()
# Check if PostgreSQL DB tables exist, if not, tables will be created
await postgres_db.check_tables()
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=zhipu_complete,
@@ -57,17 +49,7 @@ async def main():
graph_storage="PGGraphStorage",
vector_storage="PGVectorStorage",
)
# Set the KV/vector/graph storage's `db` property, so all operation will use same connection pool
rag.doc_status.db = postgres_db
rag.full_docs.db = postgres_db
rag.text_chunks.db = postgres_db
rag.llm_response_cache.db = postgres_db
rag.key_string_value_json_storage_cls.db = postgres_db
rag.chunks_vdb.db = postgres_db
rag.relationships_vdb.db = postgres_db
rag.entities_vdb.db = postgres_db
rag.graph_storage_cls.db = postgres_db
rag.chunk_entity_relation_graph.db = postgres_db
# add embedding_func for graph database, it's deleted in commit 5661d76860436f7bf5aef2e50d9ee4a59660146c
rag.chunk_entity_relation_graph.embedding_func = rag.embedding_func