added nvidia text-embedding API and example of using nvidia API llm and text-embedding
This commit is contained in:
@@ -502,6 +502,20 @@ async def gpt_4o_mini_complete(
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def nvidia_openai_complete(
|
||||
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
||||
) -> str:
|
||||
result = await openai_complete_if_cache(
|
||||
"nvidia/llama-3.1-nemotron-70b-instruct", #context length 128k
|
||||
prompt,
|
||||
system_prompt=system_prompt,
|
||||
history_messages=history_messages,
|
||||
base_url="https://integrate.api.nvidia.com/v1",
|
||||
**kwargs,
|
||||
)
|
||||
if keyword_extraction: # TODO: use JSON API
|
||||
return locate_json_string_body_from_string(result)
|
||||
return result
|
||||
|
||||
async def azure_openai_complete(
|
||||
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
||||
@@ -588,6 +602,32 @@ async def openai_embedding(
|
||||
return np.array([dp.embedding for dp in response.data])
|
||||
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=60),
|
||||
retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
|
||||
)
|
||||
async def nvidia_openai_embedding(
|
||||
texts: list[str],
|
||||
model: str = "nvidia/llama-3.2-nv-embedqa-1b-v1", #refer to https://build.nvidia.com/nim?filters=usecase%3Ausecase_text_to_embedding
|
||||
base_url: str = "https://integrate.api.nvidia.com/v1",
|
||||
api_key: str = None,
|
||||
input_type: str = "passage", #query for retrieval, passage for embedding
|
||||
trunc: str = "NONE", #NONE or START or END
|
||||
encode: str = "float" #float or base64
|
||||
) -> np.ndarray:
|
||||
if api_key:
|
||||
os.environ["OPENAI_API_KEY"] = api_key
|
||||
|
||||
openai_async_client = (
|
||||
AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
|
||||
)
|
||||
response = await openai_async_client.embeddings.create(
|
||||
model=model, input=texts, encoding_format=encode, extra_body={"input_type": input_type, "truncate": trunc}
|
||||
)
|
||||
return np.array([dp.embedding for dp in response.data])
|
||||
|
||||
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
|
Reference in New Issue
Block a user