remove tqdm and cleaned readme and ollama

This commit is contained in:
Yannick Stephan
2025-02-18 19:58:03 +01:00
parent 24ae083284
commit 2524e02428
16 changed files with 30 additions and 141 deletions

View File

@@ -7,5 +7,4 @@ python-multipart
tenacity
tiktoken
torch
tqdm
uvicorn

View File

@@ -22,7 +22,6 @@ if not pm.is_installed("faiss"):
try:
import faiss
from tqdm.asyncio import tqdm as tqdm_async
except ImportError as e:
raise ImportError(
"`faiss` library is not installed. Please install it via pip: `pip install faiss`."
@@ -109,16 +108,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
for i in range(0, len(contents), self._max_batch_size)
]
pbar = tqdm_async(
total=len(batches), desc="Generating embeddings", unit="batch"
)
async def wrapped_task(batch):
result = await self.embedding_func(batch)
pbar.update(1)
return result
embedding_tasks = [wrapped_task(batch) for batch in batches]
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = await asyncio.gather(*embedding_tasks)
# Flatten the list of arrays

View File

@@ -1,7 +1,6 @@
import asyncio
import os
from typing import Any, final
from tqdm.asyncio import tqdm as tqdm_async
from dataclasses import dataclass
import numpy as np
from lightrag.utils import logger
@@ -94,15 +93,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
for i in range(0, len(contents), self._max_batch_size)
]
async def wrapped_task(batch):
result = await self.embedding_func(batch)
pbar.update(1)
return result
embedding_tasks = [wrapped_task(batch) for batch in batches]
pbar = tqdm_async(
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
)
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)

View File

@@ -2,7 +2,6 @@ import os
from dataclasses import dataclass
import numpy as np
import configparser
from tqdm.asyncio import tqdm as tqdm_async
import asyncio
from typing import Any, List, Union, final
@@ -854,17 +853,8 @@ class MongoVectorDBStorage(BaseVectorStorage):
for i in range(0, len(contents), self._max_batch_size)
]
async def wrapped_task(batch):
result = await self.embedding_func(batch)
pbar.update(1)
return result
embedding_tasks = [wrapped_task(batch) for batch in batches]
pbar = tqdm_async(
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
)
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)
for i, d in enumerate(list_data):
d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()

View File

@@ -1,7 +1,6 @@
import asyncio
import os
from typing import Any, final
from tqdm.asyncio import tqdm as tqdm_async
from dataclasses import dataclass
import numpy as np
@@ -71,15 +70,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
for i in range(0, len(contents), self._max_batch_size)
]
async def wrapped_task(batch):
result = await self.embedding_func(batch)
pbar.update(1)
return result
embedding_tasks = [wrapped_task(batch) for batch in batches]
pbar = tqdm_async(
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
)
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)

View File

@@ -41,7 +41,6 @@ if not pm.is_installed("asyncpg"):
try:
import asyncpg
from tqdm.asyncio import tqdm as tqdm_async
except ImportError as e:
raise ImportError(
@@ -380,15 +379,7 @@ class PGVectorStorage(BaseVectorStorage):
for i in range(0, len(contents), self._max_batch_size)
]
async def wrapped_task(batch):
result = await self.embedding_func(batch)
pbar.update(1)
return result
embedding_tasks = [wrapped_task(batch) for batch in batches]
pbar = tqdm_async(
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
)
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)

View File

@@ -1,7 +1,6 @@
import asyncio
import os
from typing import Any, final
from tqdm.asyncio import tqdm as tqdm_async
from dataclasses import dataclass
import numpy as np
import hashlib
@@ -110,15 +109,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
for i in range(0, len(contents), self._max_batch_size)
]
async def wrapped_task(batch):
result = await self.embedding_func(batch)
pbar.update(1)
return result
embedding_tasks = [wrapped_task(batch) for batch in batches]
pbar = tqdm_async(
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
)
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)

View File

@@ -1,6 +1,5 @@
import os
from typing import Any, final
from tqdm.asyncio import tqdm as tqdm_async
from dataclasses import dataclass
import pipmaster as pm
import configparser
@@ -51,7 +50,8 @@ class RedisKVStorage(BaseKVStorage):
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
pipe = self._redis.pipeline()
for k, v in tqdm_async(data.items(), desc="Upserting"):
for k, v in data.items():
pipe.set(f"{self.namespace}:{k}", json.dumps(v))
await pipe.execute()

View File

@@ -7,7 +7,6 @@ import numpy as np
from lightrag.types import KnowledgeGraph
from tqdm import tqdm
from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
from ..namespace import NameSpace, is_namespace
@@ -270,15 +269,8 @@ class TiDBVectorDBStorage(BaseVectorStorage):
for i in range(0, len(contents), self._max_batch_size)
]
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = []
for f in tqdm(
asyncio.as_completed(embedding_tasks),
total=len(embedding_tasks),
desc="Generating embeddings",
unit="batch",
):
embeddings = await f
embeddings_list.append(embeddings)
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)
for i, d in enumerate(list_data):
d["content_vector"] = embeddings[i]

View File

@@ -4,7 +4,7 @@ if sys.version_info < (3, 9):
from typing import AsyncIterator
else:
from collections.abc import AsyncIterator
import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules
@@ -48,7 +48,7 @@ async def _ollama_model_if_cache(
**kwargs,
) -> Union[str, AsyncIterator[str]]:
stream = True if kwargs.get("stream") else False
kwargs.pop("max_tokens", None)
# kwargs.pop("response_format", None) # allow json
host = kwargs.pop("host", None)
@@ -129,4 +129,4 @@ async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
kwargs["headers"] = headers
ollama_client = ollama.Client(**kwargs)
data = ollama_client.embed(model=embed_model, input=texts)
return data["embeddings"]
return data["embeddings"]

View File

@@ -3,7 +3,6 @@ from __future__ import annotations
import asyncio
import json
import re
from tqdm.asyncio import tqdm as tqdm_async
from typing import Any, AsyncIterator
from collections import Counter, defaultdict
from .utils import (
@@ -500,16 +499,8 @@ async def extract_entities(
)
return dict(maybe_nodes), dict(maybe_edges)
results = []
for result in tqdm_async(
asyncio.as_completed([_process_single_content(c) for c in ordered_chunks]),
total=len(ordered_chunks),
desc="Level 2 - Extracting entities and relationships",
unit="chunk",
position=1,
leave=False,
):
results.append(await result)
tasks = [_process_single_content(c) for c in ordered_chunks]
results = await asyncio.gather(*tasks)
maybe_nodes = defaultdict(list)
maybe_edges = defaultdict(list)
@@ -518,41 +509,20 @@ async def extract_entities(
maybe_nodes[k].extend(v)
for k, v in m_edges.items():
maybe_edges[tuple(sorted(k))].extend(v)
logger.debug("Inserting entities into storage...")
all_entities_data = []
for result in tqdm_async(
asyncio.as_completed(
[
_merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
for k, v in maybe_nodes.items()
]
),
total=len(maybe_nodes),
desc="Level 3 - Inserting entities",
unit="entity",
position=2,
leave=False,
):
all_entities_data.append(await result)
logger.debug("Inserting relationships into storage...")
all_relationships_data = []
for result in tqdm_async(
asyncio.as_completed(
[
_merge_edges_then_upsert(
k[0], k[1], v, knowledge_graph_inst, global_config
)
for k, v in maybe_edges.items()
]
),
total=len(maybe_edges),
desc="Level 3 - Inserting relationships",
unit="relationship",
position=3,
leave=False,
):
all_relationships_data.append(await result)
all_entities_data = await asyncio.gather(
*[
_merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
for k, v in maybe_nodes.items()
]
)
all_relationships_data = await asyncio.gather(
*[
_merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
for k, v in maybe_edges.items()
]
)
if not len(all_entities_data) and not len(all_relationships_data):
logger.warning(

View File

@@ -19,7 +19,6 @@ import tiktoken
from lightrag.prompt import PROMPTS
VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
@@ -84,7 +83,6 @@ class EmbeddingFunc:
return await self.func(*args, **kwargs)
def locate_json_string_body_from_string(content: str) -> str | None:
"""Locate the JSON string body from a string"""
try:
@@ -715,4 +713,3 @@ def get_conversation_turns(
)
return "\n".join(formatted_turns)