Merge pull request #423 from davidleon/feature/jina_embedding

add jina embedding
This commit is contained in:
zrguo
2024-12-09 10:18:50 +08:00
committed by GitHub
2 changed files with 148 additions and 0 deletions

View File

@@ -583,6 +583,40 @@ async def openai_embedding(
return np.array([dp.embedding for dp in response.data])
async def fetch_data(url, headers, data):
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=data) as response:
response_json = await response.json()
data_list = response_json.get("data", [])
return data_list
async def jina_embedding(
texts: list[str],
dimensions: int = 1024,
late_chunking: bool = False,
base_url: str = None,
api_key: str = None,
) -> np.ndarray:
if api_key:
os.environ["JINA_API_KEY"] = api_key
url = "https://api.jina.ai/v1/embeddings" if not base_url else base_url
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ["JINA_API_KEY"]}",
}
data = {
"model": "jina-embeddings-v3",
"normalized": True,
"embedding_type": "float",
"dimensions": f"{dimensions}",
"late_chunking": late_chunking,
"input": texts,
}
data_list = await fetch_data(url, headers, data)
return np.array([dp["embedding"] for dp in data_list])
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
@retry(
stop=stop_after_attempt(3),