Merge pull request #383 from MRX760/main

added nvidia text-embedding API and example of using nvidia API llm a…
This commit is contained in:
zrguo
2024-12-04 11:21:46 +08:00
committed by GitHub
2 changed files with 199 additions and 0 deletions

View File

@@ -0,0 +1,159 @@
import os
import asyncio
from lightrag import LightRAG, QueryParam
from lightrag.llm import openai_complete_if_cache, nvidia_openai_embedding, nvidia_openai_complete
from lightrag.utils import EmbeddingFunc
import numpy as np
#for custom llm_model_func
from lightrag.utils import locate_json_string_body_from_string
WORKING_DIR = "./dickens"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
#some method to use your API key (choose one)
# NVIDIA_OPENAI_API_KEY = os.getenv("NVIDIA_OPENAI_API_KEY")
NVIDIA_OPENAI_API_KEY = "nvapi-xxxx" #your api key
# using pre-defined function for nvidia LLM API. OpenAI compatible
# llm_model_func = nvidia_openai_complete
#If you trying to make custom llm_model_func to use llm model on NVIDIA API like other example:
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
result = await openai_complete_if_cache(
"nvidia/llama-3.1-nemotron-70b-instruct",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=NVIDIA_OPENAI_API_KEY,
base_url="https://integrate.api.nvidia.com/v1",
**kwargs,
)
if keyword_extraction:
return locate_json_string_body_from_string(result)
return result
#custom embedding
nvidia_embed_model = "nvidia/nv-embedqa-e5-v5"
async def indexing_embedding_func(texts: list[str]) -> np.ndarray:
return await nvidia_openai_embedding(
texts,
model = nvidia_embed_model, #maximum 512 token
# model="nvidia/llama-3.2-nv-embedqa-1b-v1",
api_key=NVIDIA_OPENAI_API_KEY,
base_url="https://integrate.api.nvidia.com/v1",
input_type = "passage",
trunc = "END", #handling on server side if input token is longer than maximum token
encode = "float"
)
async def query_embedding_func(texts: list[str]) -> np.ndarray:
return await nvidia_openai_embedding(
texts,
model = nvidia_embed_model, #maximum 512 token
# model="nvidia/llama-3.2-nv-embedqa-1b-v1",
api_key=NVIDIA_OPENAI_API_KEY,
base_url="https://integrate.api.nvidia.com/v1",
input_type = "query",
trunc = "END", #handling on server side if input token is longer than maximum token
encode = "float"
)
#dimension are same
async def get_embedding_dim():
test_text = ["This is a test sentence."]
embedding = await indexing_embedding_func(test_text)
embedding_dim = embedding.shape[1]
return embedding_dim
# function test
async def test_funcs():
result = await llm_model_func("How are you?")
print("llm_model_func: ", result)
result = await indexing_embedding_func(["How are you?"])
print("embedding_func: ", result)
# asyncio.run(test_funcs())
async def main():
try:
embedding_dimension = await get_embedding_dim()
print(f"Detected embedding dimension: {embedding_dimension}")
#lightRAG class during indexing
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
# llm_model_name="meta/llama3-70b-instruct", #un comment if
embedding_func=EmbeddingFunc(
embedding_dim=embedding_dimension,
max_token_size=512, #maximum token size, somehow it's still exceed maximum number of token
#so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
#so you can adjust to be able to fit the NVIDIA model (future work)
func=indexing_embedding_func,
),
)
#reading file
with open("./book.txt", "r", encoding="utf-8") as f:
await rag.ainsert(f.read())
#redefine rag to change embedding into query type
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
# llm_model_name="meta/llama3-70b-instruct", #un comment if
embedding_func=EmbeddingFunc(
embedding_dim=embedding_dimension,
max_token_size=512,
func=query_embedding_func,
),
)
# Perform naive search
print("==============Naive===============")
print(
await rag.aquery(
"What are the top themes in this story?", param=QueryParam(mode="naive")
)
)
# Perform local search
print("==============local===============")
print(
await rag.aquery(
"What are the top themes in this story?", param=QueryParam(mode="local")
)
)
# Perform global search
print("==============global===============")
print(
await rag.aquery(
"What are the top themes in this story?",
param=QueryParam(mode="global"),
)
)
# Perform hybrid search
print("==============hybrid===============")
print(
await rag.aquery(
"What are the top themes in this story?",
param=QueryParam(mode="hybrid"),
)
)
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -502,6 +502,20 @@ async def gpt_4o_mini_complete(
**kwargs,
)
async def nvidia_openai_complete(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
result = await openai_complete_if_cache(
"nvidia/llama-3.1-nemotron-70b-instruct", #context length 128k
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
base_url="https://integrate.api.nvidia.com/v1",
**kwargs,
)
if keyword_extraction: # TODO: use JSON API
return locate_json_string_body_from_string(result)
return result
async def azure_openai_complete(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
@@ -588,6 +602,32 @@ async def openai_embedding(
return np.array([dp.embedding for dp in response.data])
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=60),
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
)
async def nvidia_openai_embedding(
texts: list[str],
model: str = "nvidia/llama-3.2-nv-embedqa-1b-v1", #refer to https://build.nvidia.com/nim?filters=usecase%3Ausecase_text_to_embedding
base_url: str = "https://integrate.api.nvidia.com/v1",
api_key: str = None,
input_type: str = "passage", #query for retrieval, passage for embedding
trunc: str = "NONE", #NONE or START or END
encode: str = "float" #float or base64
) -> np.ndarray:
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
openai_async_client = (
AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
)
response = await openai_async_client.embeddings.create(
model=model, input=texts, encoding_format=encode, extra_body={"input_type": input_type, "truncate": trunc}
)
return np.array([dp.embedding for dp in response.data])
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
@retry(
stop=stop_after_attempt(3),