From 3117bc2e4ac5f1b4035f289756a89ac508ffd999 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 30 Apr 2025 18:48:41 +0800 Subject: [PATCH] Remove buggy examplesfiles --- examples/.env.oai.example | 7 -- examples/batch_eval.py | 108 --------------------- examples/generate_query.py | 55 ----------- examples/get_all_edges_nx.py | 40 -------- examples/openai_README.md | 114 ---------------------- examples/openai_README_zh.md | 115 ---------------------- examples/test.py | 68 ------------- examples/test_chromadb.py | 158 ------------------------------- examples/test_faiss.py | 108 --------------------- examples/test_neo4j.py | 71 -------------- examples/test_postgres.py | 51 ---------- examples/vram_management_demo.py | 121 ----------------------- 12 files changed, 1016 deletions(-) delete mode 100644 examples/.env.oai.example delete mode 100644 examples/batch_eval.py delete mode 100644 examples/generate_query.py delete mode 100644 examples/get_all_edges_nx.py delete mode 100644 examples/openai_README.md delete mode 100644 examples/openai_README_zh.md delete mode 100644 examples/test.py delete mode 100644 examples/test_chromadb.py delete mode 100644 examples/test_faiss.py delete mode 100644 examples/test_neo4j.py delete mode 100644 examples/test_postgres.py delete mode 100644 examples/vram_management_demo.py diff --git a/examples/.env.oai.example b/examples/.env.oai.example deleted file mode 100644 index cea86da2..00000000 --- a/examples/.env.oai.example +++ /dev/null @@ -1,7 +0,0 @@ -AZURE_OPENAI_API_VERSION=2024-08-01-preview -AZURE_OPENAI_DEPLOYMENT=gpt-4o -AZURE_OPENAI_API_KEY=myapikey -AZURE_OPENAI_ENDPOINT=https://myendpoint.openai.azure.com - -AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large -AZURE_EMBEDDING_API_VERSION=2023-05-15 diff --git a/examples/batch_eval.py b/examples/batch_eval.py deleted file mode 100644 index a85e1ede..00000000 --- a/examples/batch_eval.py +++ /dev/null @@ -1,108 +0,0 @@ -import re -import json -import jsonlines - -from openai import OpenAI - - -def batch_eval(query_file, result1_file, result2_file, output_file_path): - client = OpenAI() - - with open(query_file, "r") as f: - data = f.read() - - queries = re.findall(r"- Question \d+: (.+)", data) - - with open(result1_file, "r") as f: - answers1 = json.load(f) - answers1 = [i["result"] for i in answers1] - - with open(result2_file, "r") as f: - answers2 = json.load(f) - answers2 = [i["result"] for i in answers2] - - requests = [] - for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)): - sys_prompt = """ - ---Role--- - You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. - """ - - prompt = f""" - You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**. - - - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question? - - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question? - - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic? - - For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories. - - Here is the question: - {query} - - Here are the two answers: - - **Answer 1:** - {answer1} - - **Answer 2:** - {answer2} - - Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion. - - Output your evaluation in the following JSON format: - - {{ - "Comprehensiveness": {{ - "Winner": "[Answer 1 or Answer 2]", - "Explanation": "[Provide explanation here]" - }}, - "Empowerment": {{ - "Winner": "[Answer 1 or Answer 2]", - "Explanation": "[Provide explanation here]" - }}, - "Overall Winner": {{ - "Winner": "[Answer 1 or Answer 2]", - "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]" - }} - }} - """ - - request_data = { - "custom_id": f"request-{i+1}", - "method": "POST", - "url": "/v1/chat/completions", - "body": { - "model": "gpt-4o-mini", - "messages": [ - {"role": "system", "content": sys_prompt}, - {"role": "user", "content": prompt}, - ], - }, - } - - requests.append(request_data) - - with jsonlines.open(output_file_path, mode="w") as writer: - for request in requests: - writer.write(request) - - print(f"Batch API requests written to {output_file_path}") - - batch_input_file = client.files.create( - file=open(output_file_path, "rb"), purpose="batch" - ) - batch_input_file_id = batch_input_file.id - - batch = client.batches.create( - input_file_id=batch_input_file_id, - endpoint="/v1/chat/completions", - completion_window="24h", - metadata={"description": "nightly eval job"}, - ) - - print(f"Batch {batch.id} has been created.") - - -if __name__ == "__main__": - batch_eval() diff --git a/examples/generate_query.py b/examples/generate_query.py deleted file mode 100644 index 705b23d3..00000000 --- a/examples/generate_query.py +++ /dev/null @@ -1,55 +0,0 @@ -from openai import OpenAI - -# os.environ["OPENAI_API_KEY"] = "" - - -def openai_complete_if_cache( - model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs -) -> str: - openai_client = OpenAI() - - messages = [] - if system_prompt: - messages.append({"role": "system", "content": system_prompt}) - messages.extend(history_messages) - messages.append({"role": "user", "content": prompt}) - - response = openai_client.chat.completions.create( - model=model, messages=messages, **kwargs - ) - return response.choices[0].message.content - - -if __name__ == "__main__": - description = "" - prompt = f""" - Given the following description of a dataset: - - {description} - - Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset. - - Output the results in the following structure: - - User 1: [user description] - - Task 1: [task description] - - Question 1: - - Question 2: - - Question 3: - - Question 4: - - Question 5: - - Task 2: [task description] - ... - - Task 5: [task description] - - User 2: [user description] - ... - - User 5: [user description] - ... - """ - - result = openai_complete_if_cache(model="gpt-4o-mini", prompt=prompt) - - file_path = "./queries.txt" - with open(file_path, "w") as file: - file.write(result) - - print(f"Queries written to {file_path}") diff --git a/examples/get_all_edges_nx.py b/examples/get_all_edges_nx.py deleted file mode 100644 index b2c1d84e..00000000 --- a/examples/get_all_edges_nx.py +++ /dev/null @@ -1,40 +0,0 @@ -import networkx as nx - -G = nx.read_graphml("./dickensTestEmbedcall/graph_chunk_entity_relation.graphml") - - -def get_all_edges_and_nodes(G): - # Get all edges and their properties - edges_with_properties = [] - for u, v, data in G.edges(data=True): - edges_with_properties.append( - { - "start": u, - "end": v, - "label": data.get( - "label", "" - ), # Assuming 'label' is used for edge type - "properties": data, - "start_node_properties": G.nodes[u], - "end_node_properties": G.nodes[v], - } - ) - - return edges_with_properties - - -# Example usage -if __name__ == "__main__": - # Assume G is your NetworkX graph loaded from Neo4j - - all_edges = get_all_edges_and_nodes(G) - - # Print all edges and node properties - for edge in all_edges: - print(f"Edge Label: {edge['label']}") - print(f"Edge Properties: {edge['properties']}") - print(f"Start Node: {edge['start']}") - print(f"Start Node Properties: {edge['start_node_properties']}") - print(f"End Node: {edge['end']}") - print(f"End Node Properties: {edge['end_node_properties']}") - print("---") diff --git a/examples/openai_README.md b/examples/openai_README.md deleted file mode 100644 index e2d8d42e..00000000 --- a/examples/openai_README.md +++ /dev/null @@ -1,114 +0,0 @@ - -## API Server Implementation - -LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests. - -### Setting up the API Server -
-Click to expand setup instructions - -1. First, ensure you have the required dependencies: -```bash -pip install fastapi uvicorn pydantic -``` - -2. Set up your environment variables: -```bash -export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" -export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1" -export OPENAI_API_KEY="Your OpenAI API key" # Required -export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini" -export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large" -``` - -3. Run the API server: -```bash -python examples/lightrag_api_openai_compatible_demo.py -``` - -The server will start on `http://0.0.0.0:8020`. -
- -### API Endpoints - -The API server provides the following endpoints: - -#### 1. Query Endpoint -
-Click to view Query endpoint details - -- **URL:** `/query` -- **Method:** POST -- **Body:** -```json -{ - "query": "Your question here", - "mode": "hybrid", // Can be "naive", "local", "global", or "hybrid" - "only_need_context": true // Optional: Defaults to false, if true, only the referenced context will be returned, otherwise the llm answer will be returned -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/query" \ - -H "Content-Type: application/json" \ - -d '{"query": "What are the main themes?", "mode": "hybrid"}' -``` -
- -#### 2. Insert Text Endpoint -
-Click to view Insert Text endpoint details - -- **URL:** `/insert` -- **Method:** POST -- **Body:** -```json -{ - "text": "Your text content here" -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/insert" \ - -H "Content-Type: application/json" \ - -d '{"text": "Content to be inserted into RAG"}' -``` -
- -#### 3. Insert File Endpoint -
-Click to view Insert File endpoint details - -- **URL:** `/insert_file` -- **Method:** POST -- **Body:** -```json -{ - "file_path": "path/to/your/file.txt" -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/insert_file" \ - -H "Content-Type: application/json" \ - -d '{"file_path": "./book.txt"}' -``` -
- -#### 4. Health Check Endpoint -
-Click to view Health Check endpoint details - -- **URL:** `/health` -- **Method:** GET -- **Example:** -```bash -curl -X GET "http://127.0.0.1:8020/health" -``` -
- -### Configuration - -The API server can be configured using environment variables: -- `RAG_DIR`: Directory for storing the RAG index (default: "index_default") -- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers diff --git a/examples/openai_README_zh.md b/examples/openai_README_zh.md deleted file mode 100644 index 068b2caf..00000000 --- a/examples/openai_README_zh.md +++ /dev/null @@ -1,115 +0,0 @@ - -## API 服务器实现 - -LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests. -LightRAG 还提供基于 FastAPI 的服务器实现,用于对 RAG 操作进行 RESTful API 访问。这允许您将 LightRAG 作为服务运行并通过 HTTP 请求与其交互。 - -### 设置 API 服务器 -
-单击展开设置说明 - -1. 首先,确保您具有所需的依赖项: -```bash -pip install fastapi uvicorn pydantic -``` - -2. 设置您的环境变量: -```bash -export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" -export OPENAI_BASE_URL="Your OpenAI API base URL" # Optional: Defaults to "https://api.openai.com/v1" -export OPENAI_API_KEY="Your OpenAI API key" # Required -export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini" -export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large" -``` - -3. 运行API服务器: -```bash -python examples/lightrag_api_openai_compatible_demo.py -``` - -服务器将启动于 `http://0.0.0.0:8020`. -
- -### API端点 - -API服务器提供以下端点: - -#### 1. 查询端点 -
-点击查看查询端点详情 - -- **URL:** `/query` -- **Method:** POST -- **Body:** -```json -{ - "query": "Your question here", - "mode": "hybrid", // Can be "naive", "local", "global", or "hybrid" - "only_need_context": true // Optional: Defaults to false, if true, only the referenced context will be returned, otherwise the llm answer will be returned -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/query" \ - -H "Content-Type: application/json" \ - -d '{"query": "What are the main themes?", "mode": "hybrid"}' -``` -
- -#### 2. 插入文本端点 -
-单击可查看插入文本端点详细信息 - -- **URL:** `/insert` -- **Method:** POST -- **Body:** -```json -{ - "text": "Your text content here" -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/insert" \ - -H "Content-Type: application/json" \ - -d '{"text": "Content to be inserted into RAG"}' -``` -
- -#### 3. 插入文件端点 -
-单击查看插入文件端点详细信息 - -- **URL:** `/insert_file` -- **Method:** POST -- **Body:** -```json -{ - "file_path": "path/to/your/file.txt" -} -``` -- **Example:** -```bash -curl -X POST "http://127.0.0.1:8020/insert_file" \ - -H "Content-Type: application/json" \ - -d '{"file_path": "./book.txt"}' -``` -
- -#### 4. 健康检查端点 -
-点击查看健康检查端点详细信息 - -- **URL:** `/health` -- **Method:** GET -- **Example:** -```bash -curl -X GET "http://127.0.0.1:8020/health" -``` -
- -### 配置 - -可以使用环境变量配置API服务器: -- `RAG_DIR`: 存放RAG索引的目录 (default: "index_default") -- 应在代码中为您的特定 LLM 和嵌入模型提供商配置 API 密钥和基本 URL diff --git a/examples/test.py b/examples/test.py deleted file mode 100644 index f2456436..00000000 --- a/examples/test.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import asyncio -from lightrag import LightRAG, QueryParam -from lightrag.llm.openai import gpt_4o_mini_complete -from lightrag.kg.shared_storage import initialize_pipeline_status -######### -# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() -# import nest_asyncio -# nest_asyncio.apply() -######### - -WORKING_DIR = "./dickens" - -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - - -async def initialize_rag(): - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model - # llm_model_func=gpt_4o_complete # Optionally, use a stronger model - ) - - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag - - -def main(): - # Initialize RAG instance - rag = asyncio.run(initialize_rag()) - - with open("./book.txt", "r", encoding="utf-8") as f: - rag.insert(f.read()) - - # Perform naive search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="naive") - ) - ) - - # Perform local search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="local") - ) - ) - - # Perform global search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="global") - ) - ) - - # Perform hybrid search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="hybrid") - ) - ) - - -if __name__ == "__main__": - main() diff --git a/examples/test_chromadb.py b/examples/test_chromadb.py deleted file mode 100644 index e4e9b698..00000000 --- a/examples/test_chromadb.py +++ /dev/null @@ -1,158 +0,0 @@ -import os -import asyncio -from lightrag import LightRAG, QueryParam -from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed -from lightrag.utils import EmbeddingFunc -import numpy as np -from lightrag.kg.shared_storage import initialize_pipeline_status - -######### -# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() -# import nest_asyncio -# nest_asyncio.apply() -######### -WORKING_DIR = "./chromadb_test_dir" -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - -# ChromaDB Configuration -CHROMADB_USE_LOCAL_PERSISTENT = False -# Local PersistentClient Configuration -CHROMADB_LOCAL_PATH = os.environ.get( - "CHROMADB_LOCAL_PATH", os.path.join(WORKING_DIR, "chroma_data") -) -# Remote HttpClient Configuration -CHROMADB_HOST = os.environ.get("CHROMADB_HOST", "localhost") -CHROMADB_PORT = int(os.environ.get("CHROMADB_PORT", 8000)) -CHROMADB_AUTH_TOKEN = os.environ.get("CHROMADB_AUTH_TOKEN", "secret-token") -CHROMADB_AUTH_PROVIDER = os.environ.get( - "CHROMADB_AUTH_PROVIDER", "chromadb.auth.token_authn.TokenAuthClientProvider" -) -CHROMADB_AUTH_HEADER = os.environ.get("CHROMADB_AUTH_HEADER", "X-Chroma-Token") - -# Embedding Configuration and Functions -EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large") -EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192)) - -# ChromaDB requires knowing the dimension of embeddings upfront when -# creating a collection. The embedding dimension is model-specific -# (e.g. text-embedding-3-large uses 3072 dimensions) -# we dynamically determine it by running a test embedding -# and then pass it to the ChromaDBStorage class - - -async def embedding_func(texts: list[str]) -> np.ndarray: - return await openai_embed( - texts, - model=EMBEDDING_MODEL, - ) - - -async def get_embedding_dimension(): - test_text = ["This is a test sentence."] - embedding = await embedding_func(test_text) - return embedding.shape[1] - - -async def create_embedding_function_instance(): - # Get embedding dimension - embedding_dimension = await get_embedding_dimension() - # Create embedding function instance - return EmbeddingFunc( - embedding_dim=embedding_dimension, - max_token_size=EMBEDDING_MAX_TOKEN_SIZE, - func=embedding_func, - ) - - -async def initialize_rag(): - embedding_func_instance = await create_embedding_function_instance() - if CHROMADB_USE_LOCAL_PERSISTENT: - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=gpt_4o_mini_complete, - embedding_func=embedding_func_instance, - vector_storage="ChromaVectorDBStorage", - log_level="DEBUG", - embedding_batch_num=32, - vector_db_storage_cls_kwargs={ - "local_path": CHROMADB_LOCAL_PATH, - "collection_settings": { - "hnsw:space": "cosine", - "hnsw:construction_ef": 128, - "hnsw:search_ef": 128, - "hnsw:M": 16, - "hnsw:batch_size": 100, - "hnsw:sync_threshold": 1000, - }, - }, - ) - else: - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=gpt_4o_mini_complete, - embedding_func=embedding_func_instance, - vector_storage="ChromaVectorDBStorage", - log_level="DEBUG", - embedding_batch_num=32, - vector_db_storage_cls_kwargs={ - "host": CHROMADB_HOST, - "port": CHROMADB_PORT, - "auth_token": CHROMADB_AUTH_TOKEN, - "auth_provider": CHROMADB_AUTH_PROVIDER, - "auth_header_name": CHROMADB_AUTH_HEADER, - "collection_settings": { - "hnsw:space": "cosine", - "hnsw:construction_ef": 128, - "hnsw:search_ef": 128, - "hnsw:M": 16, - "hnsw:batch_size": 100, - "hnsw:sync_threshold": 1000, - }, - }, - ) - - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag - - -def main(): - # Initialize RAG instance - rag = asyncio.run(initialize_rag()) - - with open("./book.txt", "r", encoding="utf-8") as f: - rag.insert(f.read()) - - # Perform naive search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="naive") - ) - ) - - # Perform local search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="local") - ) - ) - - # Perform global search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="global") - ) - ) - - # Perform hybrid search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="hybrid") - ) - ) - - -if __name__ == "__main__": - main() diff --git a/examples/test_faiss.py b/examples/test_faiss.py deleted file mode 100644 index febdce14..00000000 --- a/examples/test_faiss.py +++ /dev/null @@ -1,108 +0,0 @@ -import os -import logging -import asyncio -import numpy as np - -from dotenv import load_dotenv -from sentence_transformers import SentenceTransformer - -from openai import AzureOpenAI -from lightrag import LightRAG, QueryParam -from lightrag.utils import EmbeddingFunc -from lightrag.kg.shared_storage import initialize_pipeline_status - -WORKING_DIR = "./dickens" -# Configure Logging -logging.basicConfig(level=logging.INFO) - -# Load environment variables from .env file -load_dotenv() -AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") -AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT") -AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") -AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") - - -async def llm_model_func( - prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs -) -> str: - # Create a client for AzureOpenAI - client = AzureOpenAI( - api_key=AZURE_OPENAI_API_KEY, - api_version=AZURE_OPENAI_API_VERSION, - azure_endpoint=AZURE_OPENAI_ENDPOINT, - ) - - # Build the messages list for the conversation - messages = [] - if system_prompt: - messages.append({"role": "system", "content": system_prompt}) - if history_messages: - messages.extend(history_messages) - messages.append({"role": "user", "content": prompt}) - - # Call the LLM - chat_completion = client.chat.completions.create( - model=AZURE_OPENAI_DEPLOYMENT, - messages=messages, - temperature=kwargs.get("temperature", 0), - top_p=kwargs.get("top_p", 1), - n=kwargs.get("n", 1), - ) - - return chat_completion.choices[0].message.content - - -async def embedding_func(texts: list[str]) -> np.ndarray: - model = SentenceTransformer("all-MiniLM-L6-v2") - embeddings = model.encode(texts, convert_to_numpy=True) - return embeddings - - -async def initialize_rag(): - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=384, - max_token_size=8192, - func=embedding_func, - ), - vector_storage="FaissVectorDBStorage", - vector_db_storage_cls_kwargs={ - "cosine_better_than_threshold": 0.2 # Your desired threshold - }, - ) - - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag - - -def main(): - # Initialize RAG instance - rag = asyncio.run(initialize_rag()) - # Insert the custom chunks into LightRAG - book1 = open("./book_1.txt", encoding="utf-8") - book2 = open("./book_2.txt", encoding="utf-8") - - rag.insert([book1.read(), book2.read()]) - - query_text = "What are the main themes?" - - print("Result (Naive):") - print(rag.query(query_text, param=QueryParam(mode="naive"))) - - print("\nResult (Local):") - print(rag.query(query_text, param=QueryParam(mode="local"))) - - print("\nResult (Global):") - print(rag.query(query_text, param=QueryParam(mode="global"))) - - print("\nResult (Hybrid):") - print(rag.query(query_text, param=QueryParam(mode="hybrid"))) - - -if __name__ == "__main__": - main() diff --git a/examples/test_neo4j.py b/examples/test_neo4j.py deleted file mode 100644 index 7f620acc..00000000 --- a/examples/test_neo4j.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -import asyncio -from lightrag import LightRAG, QueryParam -from lightrag.llm.openai import gpt_4o_mini_complete -from lightrag.kg.shared_storage import initialize_pipeline_status - -######### -# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() -# import nest_asyncio -# nest_asyncio.apply() -######### - -WORKING_DIR = "./local_neo4jWorkDir" - -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - - -async def initialize_rag(): - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model - graph_storage="Neo4JStorage", - log_level="INFO", - # llm_model_func=gpt_4o_complete # Optionally, use a stronger model - ) - - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag - - -def main(): - # Initialize RAG instance - rag = asyncio.run(initialize_rag()) - - with open("./book.txt", "r", encoding="utf-8") as f: - rag.insert(f.read()) - - # Perform naive search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="naive") - ) - ) - - # Perform local search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="local") - ) - ) - - # Perform global search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="global") - ) - ) - - # Perform hybrid search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="hybrid") - ) - ) - - -if __name__ == "__main__": - main() diff --git a/examples/test_postgres.py b/examples/test_postgres.py deleted file mode 100644 index e1f796c6..00000000 --- a/examples/test_postgres.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import asyncio -from lightrag.kg.postgres_impl import PGGraphStorage -from lightrag.llm.ollama import ollama_embedding -from lightrag.utils import EmbeddingFunc - -######### -# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() -# import nest_asyncio -# nest_asyncio.apply() -######### - -WORKING_DIR = "./local_neo4jWorkDir" - -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - -# AGE -os.environ["AGE_GRAPH_NAME"] = "dickens" - -os.environ["POSTGRES_HOST"] = "localhost" -os.environ["POSTGRES_PORT"] = "15432" -os.environ["POSTGRES_USER"] = "rag" -os.environ["POSTGRES_PASSWORD"] = "rag" -os.environ["POSTGRES_DATABASE"] = "rag" - - -async def main(): - graph_db = PGGraphStorage( - namespace="dickens", - embedding_func=EmbeddingFunc( - embedding_dim=1024, - max_token_size=8192, - func=lambda texts: ollama_embedding( - texts, embed_model="bge-m3", host="http://localhost:11434" - ), - ), - global_config={}, - ) - await graph_db.initialize() - labels = await graph_db.get_all_labels() - print("all labels", labels) - - res = await graph_db.get_knowledge_graph("FEZZIWIG") - print("knowledge graphs", res) - - await graph_db.finalize() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/vram_management_demo.py b/examples/vram_management_demo.py deleted file mode 100644 index 36eb5468..00000000 --- a/examples/vram_management_demo.py +++ /dev/null @@ -1,121 +0,0 @@ -import os -import time -import asyncio -from lightrag import LightRAG, QueryParam -from lightrag.llm.ollama import ollama_model_complete, ollama_embed -from lightrag.utils import EmbeddingFunc -from lightrag.kg.shared_storage import initialize_pipeline_status - -# Working directory and the directory path for text files -WORKING_DIR = "./dickens" -TEXT_FILES_DIR = "/llm/mt" - -# Create the working directory if it doesn't exist -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - - -async def initialize_rag(): - # Initialize LightRAG - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=ollama_model_complete, - llm_model_name="qwen2.5:3b-instruct-max-context", - embedding_func=EmbeddingFunc( - embedding_dim=768, - max_token_size=8192, - func=lambda texts: ollama_embed(texts, embed_model="nomic-embed-text"), - ), - ) - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag - - -# Read all .txt files from the TEXT_FILES_DIR directory -texts = [] -for filename in os.listdir(TEXT_FILES_DIR): - if filename.endswith(".txt"): - file_path = os.path.join(TEXT_FILES_DIR, filename) - with open(file_path, "r", encoding="utf-8") as file: - texts.append(file.read()) - - -# Batch insert texts into LightRAG with a retry mechanism -def insert_texts_with_retry(rag, texts, retries=3, delay=5): - for _ in range(retries): - try: - rag.insert(texts) - return - except Exception as e: - print( - f"Error occurred during insertion: {e}. Retrying in {delay} seconds..." - ) - time.sleep(delay) - raise RuntimeError("Failed to insert texts after multiple retries.") - - -def main(): - # Initialize RAG instance - rag = asyncio.run(initialize_rag()) - - insert_texts_with_retry(rag, texts) - - # Perform different types of queries and handle potential errors - try: - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="naive") - ) - ) - except Exception as e: - print(f"Error performing naive search: {e}") - - try: - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="local") - ) - ) - except Exception as e: - print(f"Error performing local search: {e}") - - try: - print( - rag.query( - "What are the top themes in this story?", - param=QueryParam(mode="global"), - ) - ) - except Exception as e: - print(f"Error performing global search: {e}") - - try: - print( - rag.query( - "What are the top themes in this story?", - param=QueryParam(mode="hybrid"), - ) - ) - except Exception as e: - print(f"Error performing hybrid search: {e}") - - # Function to clear VRAM resources - def clear_vram(): - os.system("sudo nvidia-smi --gpu-reset") - - # Regularly clear VRAM to prevent overflow - clear_vram_interval = 3600 # Clear once every hour - start_time = time.time() - - while True: - current_time = time.time() - if current_time - start_time > clear_vram_interval: - clear_vram() - start_time = current_time - time.sleep(60) # Check the time every minute - - -if __name__ == "__main__": - main()