Use the context manager for the openai client
This avoids issues of resource cleanup (too many open files) when dealing with massively parallel calls to the openai API since RAII in python is highly unreliable in such contexts.
This commit is contained in:
@@ -177,6 +177,7 @@ async def openai_complete_if_cache(
|
|||||||
logger.debug("===== Sending Query to LLM =====")
|
logger.debug("===== Sending Query to LLM =====")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with openai_async_client:
|
||||||
if "response_format" in kwargs:
|
if "response_format" in kwargs:
|
||||||
response = await openai_async_client.beta.chat.completions.parse(
|
response = await openai_async_client.beta.chat.completions.parse(
|
||||||
model=model, messages=messages, **kwargs
|
model=model, messages=messages, **kwargs
|
||||||
@@ -421,6 +422,7 @@ async def openai_embed(
|
|||||||
api_key=api_key, base_url=base_url, client_configs=client_configs
|
api_key=api_key, base_url=base_url, client_configs=client_configs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async with openai_async_client:
|
||||||
response = await openai_async_client.embeddings.create(
|
response = await openai_async_client.embeddings.create(
|
||||||
model=model, input=texts, encoding_format="float"
|
model=model, input=texts, encoding_format="float"
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user