Fixed missing imports bug and fixed linting

This commit is contained in:
Saifeddine ALOUI
2025-01-25 00:55:07 +01:00
parent e060fa6abb
commit 06c9e4e454
20 changed files with 59 additions and 90 deletions

View File

@@ -684,7 +684,9 @@ def create_app(args):
trace_exception(e) trace_exception(e)
logging.error(f"Error indexing file {file_path}: {str(e)}") logging.error(f"Error indexing file {file_path}: {str(e)}")
ASCIIColors.info(f"Indexed {len(new_files)} documents from {args.input_dir}") ASCIIColors.info(
f"Indexed {len(new_files)} documents from {args.input_dir}"
)
except Exception as e: except Exception as e:
logging.error(f"Error during startup indexing: {str(e)}") logging.error(f"Error during startup indexing: {str(e)}")
yield yield
@@ -917,7 +919,6 @@ def create_app(args):
else: else:
logging.warning(f"No content extracted from file: {file_path}") logging.warning(f"No content extracted from file: {file_path}")
@app.post("/documents/scan", dependencies=[Depends(optional_api_key)]) @app.post("/documents/scan", dependencies=[Depends(optional_api_key)])
async def scan_for_new_documents(): async def scan_for_new_documents():
""" """

View File

@@ -1,6 +1,7 @@
import httpx import httpx
from typing import Literal from typing import Literal
class APIStatusError(Exception): class APIStatusError(Exception):
"""Raised when an API response has a status code of 4xx or 5xx.""" """Raised when an API response has a status code of 4xx or 5xx."""
@@ -8,14 +9,19 @@ class APIStatusError(Exception):
status_code: int status_code: int
request_id: str | None request_id: str | None
def __init__(self, message: str, *, response: httpx.Response, body: object | None) -> None: def __init__(
self, message: str, *, response: httpx.Response, body: object | None
) -> None:
super().__init__(message, response.request, body=body) super().__init__(message, response.request, body=body)
self.response = response self.response = response
self.status_code = response.status_code self.status_code = response.status_code
self.request_id = response.headers.get("x-request-id") self.request_id = response.headers.get("x-request-id")
class APIConnectionError(Exception): class APIConnectionError(Exception):
def __init__(self, *, message: str = "Connection error.", request: httpx.Request) -> None: def __init__(
self, *, message: str = "Connection error.", request: httpx.Request
) -> None:
super().__init__(message, request, body=None) super().__init__(message, request, body=None)
@@ -46,10 +52,7 @@ class UnprocessableEntityError(APIStatusError):
class RateLimitError(APIStatusError): class RateLimitError(APIStatusError):
status_code: Literal[429] = 429 # pyright: ignore[reportIncompatibleVariableOverride] status_code: Literal[429] = 429 # pyright: ignore[reportIncompatibleVariableOverride]
class APITimeoutError(APIConnectionError): class APITimeoutError(APIConnectionError):
def __init__(self, request: httpx.Request) -> None: def __init__(self, request: httpx.Request) -> None:
super().__init__(message="Request timed out.", request=request) super().__init__(message="Request timed out.", request=request)
class BadRequestError(APIStatusError):
status_code: Literal[400] = 400 # pyright: ignore[reportIncompatibleVariableOverride]

View File

@@ -1,6 +1,7 @@
import os import os
from tqdm.asyncio import tqdm as tqdm_async from tqdm.asyncio import tqdm as tqdm_async
from dataclasses import dataclass from dataclasses import dataclass
# aioredis is a depricated library, replaced with redis # aioredis is a depricated library, replaced with redis
from redis.asyncio import Redis from redis.asyncio import Redis
from lightrag.utils import logger from lightrag.utils import logger

View File

@@ -42,7 +42,7 @@ __status__ = "Production"
import os import os
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("openai"): if not pm.is_installed("openai"):
@@ -71,6 +71,7 @@ from lightrag.utils import (
import numpy as np import numpy as np
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10), wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -153,6 +154,7 @@ async def azure_openai_complete(
return locate_json_string_body_from_string(result) return locate_json_string_body_from_string(result)
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191) @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8191)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
@@ -185,4 +187,3 @@ async def azure_openai_embed(
model=model, input=texts, encoding_format="float" model=model, input=texts, encoding_format="float"
) )
return np.array([dp.embedding for dp in response.data]) return np.array([dp.embedding for dp in response.data])

View File

@@ -41,12 +41,12 @@ __author__ = "lightrag Team"
__status__ = "Production" __status__ = "Production"
import sys
import copy import copy
import os import os
import json import json
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
if not pm.is_installed("aioboto3"): if not pm.is_installed("aioboto3"):
pm.install("aioboto3") pm.install("aioboto3")
if not pm.is_installed("tenacity"): if not pm.is_installed("tenacity"):
@@ -60,15 +60,11 @@ from tenacity import (
retry_if_exception_type, retry_if_exception_type,
) )
from lightrag.exceptions import (
APIConnectionError,
RateLimitError,
APITimeoutError,
)
from lightrag.utils import ( from lightrag.utils import (
locate_json_string_body_from_string, locate_json_string_body_from_string,
) )
class BedrockError(Exception): class BedrockError(Exception):
"""Generic error for issues related to Amazon Bedrock""" """Generic error for issues related to Amazon Bedrock"""

View File

@@ -42,7 +42,7 @@ __status__ = "Production"
import copy import copy
import os import os
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("transformers"): if not pm.is_installed("transformers"):
@@ -69,9 +69,11 @@ from lightrag.utils import (
locate_json_string_body_from_string, locate_json_string_body_from_string,
) )
import torch import torch
import numpy as np
os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TOKENIZERS_PARALLELISM"] = "false"
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def initialize_hf_model(model_name): def initialize_hf_model(model_name):
hf_tokenizer = AutoTokenizer.from_pretrained( hf_tokenizer = AutoTokenizer.from_pretrained(
@@ -155,7 +157,6 @@ async def hf_model_if_cache(
return response_text return response_text
async def hf_model_complete( async def hf_model_complete(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str: ) -> str:

View File

@@ -39,7 +39,7 @@ __author__ = "lightrag Team"
__status__ = "Production" __status__ = "Production"
import os import os
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("lmdeploy"): if not pm.is_installed("lmdeploy"):
@@ -47,25 +47,8 @@ if not pm.is_installed("lmdeploy"):
if not pm.is_installed("tenacity"): if not pm.is_installed("tenacity"):
pm.install("tenacity") pm.install("tenacity")
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type,
)
from lightrag.utils import (
wrap_embedding_func_with_attrs,
locate_json_string_body_from_string,
safe_unicode_decode,
logger,
)
from lightrag.types import GPTKeywordExtractionFormat
from functools import lru_cache
import numpy as np import numpy as np
from typing import Union
import aiohttp import aiohttp
@@ -101,4 +84,3 @@ async def jina_embed(
} }
data_list = await fetch_data(url, headers, data) data_list = await fetch_data(url, headers, data)
return np.array([dp["embedding"] for dp in data_list]) return np.array([dp["embedding"] for dp in data_list])

View File

@@ -40,7 +40,7 @@ __version__ = "1.0.0"
__author__ = "lightrag Team" __author__ = "lightrag Team"
__status__ = "Production" __status__ = "Production"
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("lmdeploy"): if not pm.is_installed("lmdeploy"):
@@ -63,6 +63,7 @@ from tenacity import (
from functools import lru_cache from functools import lru_cache
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def initialize_lmdeploy_pipeline( def initialize_lmdeploy_pipeline(
model, model,

View File

@@ -62,11 +62,13 @@ __status__ = "Production"
__project_url__ = "https://github.com/ParisNeo/lollms" __project_url__ = "https://github.com/ParisNeo/lollms"
__doc_url__ = "https://github.com/ParisNeo/lollms/docs" __doc_url__ = "https://github.com/ParisNeo/lollms/docs"
import sys import sys
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
from typing import AsyncIterator from typing import AsyncIterator
else: else:
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
if not pm.is_installed("aiohttp"): if not pm.is_installed("aiohttp"):
pm.install("aiohttp") pm.install("aiohttp")
if not pm.is_installed("tenacity"): if not pm.is_installed("tenacity"):
@@ -89,6 +91,7 @@ from lightrag.exceptions import (
from typing import Union, List from typing import Union, List
import numpy as np import numpy as np
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10), wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -185,7 +188,6 @@ async def lollms_model_complete(
) )
async def lollms_embed( async def lollms_embed(
texts: List[str], embed_model=None, base_url="http://localhost:9600", **kwargs texts: List[str], embed_model=None, base_url="http://localhost:9600", **kwargs
) -> np.ndarray: ) -> np.ndarray:

View File

@@ -41,15 +41,14 @@ __author__ = "lightrag Team"
__status__ = "Production" __status__ = "Production"
import sys import sys
import os import os
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
from typing import AsyncIterator pass
else: else:
from collections.abc import AsyncIterator pass
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("openai"): if not pm.is_installed("openai"):
@@ -70,15 +69,12 @@ from tenacity import (
from lightrag.utils import ( from lightrag.utils import (
wrap_embedding_func_with_attrs, wrap_embedding_func_with_attrs,
locate_json_string_body_from_string,
safe_unicode_decode,
logger,
) )
from lightrag.types import GPTKeywordExtractionFormat
import numpy as np import numpy as np
@wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512) @wrap_embedding_func_with_attrs(embedding_dim=2048, max_token_size=512)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),

View File

@@ -41,11 +41,12 @@ __author__ = "lightrag Team"
__status__ = "Production" __status__ = "Production"
import sys import sys
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
from typing import AsyncIterator from typing import AsyncIterator
else: else:
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("ollama"): if not pm.is_installed("ollama"):
@@ -114,6 +115,7 @@ async def ollama_model_if_cache(
else: else:
return response["message"]["content"] return response["message"]["content"]
async def ollama_model_complete( async def ollama_model_complete(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> Union[str, AsyncIterator[str]]: ) -> Union[str, AsyncIterator[str]]:
@@ -129,6 +131,7 @@ async def ollama_model_complete(
**kwargs, **kwargs,
) )
async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray: async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray:
""" """
Deprecated in favor of `embed`. Deprecated in favor of `embed`.

View File

@@ -41,7 +41,6 @@ __author__ = "lightrag Team"
__status__ = "Production" __status__ = "Production"
import sys import sys
import os import os
@@ -49,7 +48,7 @@ if sys.version_info < (3, 9):
from typing import AsyncIterator from typing import AsyncIterator
else: else:
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("openai"): if not pm.is_installed("openai"):
@@ -78,6 +77,7 @@ from lightrag.types import GPTKeywordExtractionFormat
import numpy as np import numpy as np
from typing import Union from typing import Union
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10), wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -141,7 +141,6 @@ async def openai_complete_if_cache(
return content return content
async def openai_complete( async def openai_complete(
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> Union[str, AsyncIterator[str]]: ) -> Union[str, AsyncIterator[str]]:
@@ -205,7 +204,6 @@ async def nvidia_openai_complete(
return result return result
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),

View File

@@ -39,23 +39,18 @@ __author__ = "lightrag Team"
__status__ = "Production" __status__ = "Production"
import sys import sys
import copy
import os
import json
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
from typing import AsyncIterator pass
else: else:
from collections.abc import AsyncIterator pass
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("lmdeploy"): if not pm.is_installed("lmdeploy"):
pm.install("lmdeploy") pm.install("lmdeploy")
from openai import ( from openai import (
AsyncOpenAI,
AsyncAzureOpenAI,
APIConnectionError, APIConnectionError,
RateLimitError, RateLimitError,
APITimeoutError, APITimeoutError,
@@ -67,19 +62,12 @@ from tenacity import (
retry_if_exception_type, retry_if_exception_type,
) )
from lightrag.utils import (
wrap_embedding_func_with_attrs,
locate_json_string_body_from_string,
safe_unicode_decode,
logger,
)
from lightrag.types import GPTKeywordExtractionFormat
from functools import lru_cache
import numpy as np import numpy as np
from typing import Union
import aiohttp import aiohttp
import base64
import struct
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),

View File

@@ -45,18 +45,16 @@ import re
import json import json
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
from typing import AsyncIterator pass
else: else:
from collections.abc import AsyncIterator pass
import pipmaster as pm # Pipmaster for dynamic library install import pipmaster as pm # Pipmaster for dynamic library install
# install specific modules # install specific modules
if not pm.is_installed("zhipuai"): if not pm.is_installed("zhipuai"):
pm.install("zhipuai") pm.install("zhipuai")
from openai import ( from openai import (
AsyncOpenAI,
AsyncAzureOpenAI,
APIConnectionError, APIConnectionError,
RateLimitError, RateLimitError,
APITimeoutError, APITimeoutError,
@@ -70,17 +68,15 @@ from tenacity import (
from lightrag.utils import ( from lightrag.utils import (
wrap_embedding_func_with_attrs, wrap_embedding_func_with_attrs,
locate_json_string_body_from_string,
safe_unicode_decode,
logger, logger,
) )
from lightrag.types import GPTKeywordExtractionFormat from lightrag.types import GPTKeywordExtractionFormat
from functools import lru_cache
import numpy as np import numpy as np
from typing import Union, List, Optional, Dict from typing import Union, List, Optional, Dict
@retry( @retry(
stop=stop_after_attempt(3), stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10), wait=wait_exponential(multiplier=1, min=4, max=10),

View File

@@ -6,7 +6,6 @@ from dataclasses import dataclass
from typing import Any, Union, cast, Dict from typing import Any, Union, cast, Dict
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import pipmaster as pm
from nano_vectordb import NanoVectorDB from nano_vectordb import NanoVectorDB
import time import time

View File

@@ -1,6 +1,7 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import List from typing import List
class GPTKeywordExtractionFormat(BaseModel): class GPTKeywordExtractionFormat(BaseModel):
high_level_keywords: List[str] high_level_keywords: List[str]
low_level_keywords: List[str] low_level_keywords: List[str]

View File

@@ -535,7 +535,8 @@ class CacheData:
min_val: Optional[float] = None min_val: Optional[float] = None
max_val: Optional[float] = None max_val: Optional[float] = None
mode: str = "default" mode: str = "default"
cache_type: str ="query" cache_type: str = "query"
async def save_to_cache(hashing_kv, cache_data: CacheData): async def save_to_cache(hashing_kv, cache_data: CacheData):
if hashing_kv is None or hasattr(cache_data.content, "__aiter__"): if hashing_kv is None or hasattr(cache_data.content, "__aiter__"):

View File

@@ -1,7 +1,6 @@
accelerate accelerate
aiofiles aiofiles
aiohttp aiohttp
redis
asyncpg asyncpg
configparser configparser
@@ -30,6 +29,7 @@ python-docx
python-dotenv python-dotenv
python-pptx python-pptx
pyvis pyvis
redis
setuptools setuptools
sqlalchemy sqlalchemy
tenacity tenacity
@@ -39,4 +39,3 @@ tenacity
tiktoken tiktoken
tqdm tqdm
xxhash xxhash