diff --git a/examples/query_keyword_separation_example.py b/examples/query_keyword_separation_example.py deleted file mode 100644 index 092330f4..00000000 --- a/examples/query_keyword_separation_example.py +++ /dev/null @@ -1,126 +0,0 @@ -import os -import asyncio -from lightrag import LightRAG, QueryParam -from lightrag.utils import EmbeddingFunc -import numpy as np -from dotenv import load_dotenv -import logging -from openai import AzureOpenAI -from lightrag.kg.shared_storage import initialize_pipeline_status - -logging.basicConfig(level=logging.INFO) - -load_dotenv() - -AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") -AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT") -AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") -AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") - -AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT") -AZURE_EMBEDDING_API_VERSION = os.getenv("AZURE_EMBEDDING_API_VERSION") - -WORKING_DIR = "./dickens" - -if os.path.exists(WORKING_DIR): - import shutil - - shutil.rmtree(WORKING_DIR) - -os.mkdir(WORKING_DIR) - - -async def llm_model_func( - prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs -) -> str: - client = AzureOpenAI( - api_key=AZURE_OPENAI_API_KEY, - api_version=AZURE_OPENAI_API_VERSION, - azure_endpoint=AZURE_OPENAI_ENDPOINT, - ) - - messages = [] - if system_prompt: - messages.append({"role": "system", "content": system_prompt}) - if history_messages: - messages.extend(history_messages) - messages.append({"role": "user", "content": prompt}) - - chat_completion = client.chat.completions.create( - model=AZURE_OPENAI_DEPLOYMENT, # model = "deployment_name". - messages=messages, - temperature=kwargs.get("temperature", 0), - top_p=kwargs.get("top_p", 1), - n=kwargs.get("n", 1), - ) - return chat_completion.choices[0].message.content - - -async def embedding_func(texts: list[str]) -> np.ndarray: - client = AzureOpenAI( - api_key=AZURE_OPENAI_API_KEY, - api_version=AZURE_EMBEDDING_API_VERSION, - azure_endpoint=AZURE_OPENAI_ENDPOINT, - ) - embedding = client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts) - - embeddings = [item.embedding for item in embedding.data] - return np.array(embeddings) - - -async def test_funcs(): - result = await llm_model_func("How are you?") - print("Resposta do llm_model_func: ", result) - - result = await embedding_func(["How are you?"]) - print("Resultado do embedding_func: ", result.shape) - print("Dimensão da embedding: ", result.shape[1]) - - -asyncio.run(test_funcs()) - -embedding_dimension = 3072 - - -async def initialize_rag(): - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=embedding_dimension, - max_token_size=8192, - func=embedding_func, - ), - ) - - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag - - -# Example function demonstrating the new query_with_separate_keyword_extraction usage -async def run_example(): - # Initialize RAG instance - rag = await initialize_rag() - - book1 = open("./book_1.txt", encoding="utf-8") - book2 = open("./book_2.txt", encoding="utf-8") - - rag.insert([book1.read(), book2.read()]) - query = "What are the top themes in this story?" - prompt = "Please simplify the response for a young audience." - - # Using the new method to ensure the keyword extraction is only applied to the query - response = rag.query_with_separate_keyword_extraction( - query=query, - prompt=prompt, - param=QueryParam(mode="hybrid"), # Adjust QueryParam mode as necessary - ) - - print("Extracted Response:", response) - - -# Run the example asynchronously -if __name__ == "__main__": - asyncio.run(run_example())