@@ -1,11 +1,9 @@
|
||||
aiofiles
|
||||
ascii_colors
|
||||
fastapi
|
||||
numpy
|
||||
pipmaster
|
||||
python-dotenv
|
||||
python-multipart
|
||||
tenacity
|
||||
tiktoken
|
||||
torch
|
||||
tqdm
|
||||
uvicorn
|
||||
|
@@ -22,7 +22,6 @@ if not pm.is_installed("faiss"):
|
||||
|
||||
try:
|
||||
import faiss
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"`faiss` library is not installed. Please install it via pip: `pip install faiss`."
|
||||
@@ -109,16 +108,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
||||
for i in range(0, len(contents), self._max_batch_size)
|
||||
]
|
||||
|
||||
pbar = tqdm_async(
|
||||
total=len(batches), desc="Generating embeddings", unit="batch"
|
||||
)
|
||||
|
||||
async def wrapped_task(batch):
|
||||
result = await self.embedding_func(batch)
|
||||
pbar.update(1)
|
||||
return result
|
||||
|
||||
embedding_tasks = [wrapped_task(batch) for batch in batches]
|
||||
embedding_tasks = [self.embedding_func(batch) for batch in batches]
|
||||
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||
|
||||
# Flatten the list of arrays
|
||||
|
@@ -1,18 +1,21 @@
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Any, final
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
from lightrag.utils import logger
|
||||
from ..base import BaseVectorStorage
|
||||
import pipmaster as pm
|
||||
import configparser
|
||||
|
||||
|
||||
if not pm.is_installed("configparser"):
|
||||
pm.install("configparser")
|
||||
|
||||
if not pm.is_installed("pymilvus"):
|
||||
pm.install("pymilvus")
|
||||
|
||||
try:
|
||||
import configparser
|
||||
from pymilvus import MilvusClient
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
@@ -94,15 +97,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
||||
for i in range(0, len(contents), self._max_batch_size)
|
||||
]
|
||||
|
||||
async def wrapped_task(batch):
|
||||
result = await self.embedding_func(batch)
|
||||
pbar.update(1)
|
||||
return result
|
||||
|
||||
embedding_tasks = [wrapped_task(batch) for batch in batches]
|
||||
pbar = tqdm_async(
|
||||
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
|
||||
)
|
||||
embedding_tasks = [self.embedding_func(batch) for batch in batches]
|
||||
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||
|
||||
embeddings = np.concatenate(embeddings_list)
|
||||
|
@@ -2,7 +2,6 @@ import os
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
import configparser
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
import asyncio
|
||||
|
||||
from typing import Any, List, Union, final
|
||||
@@ -854,17 +853,8 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
||||
for i in range(0, len(contents), self._max_batch_size)
|
||||
]
|
||||
|
||||
async def wrapped_task(batch):
|
||||
result = await self.embedding_func(batch)
|
||||
pbar.update(1)
|
||||
return result
|
||||
|
||||
embedding_tasks = [wrapped_task(batch) for batch in batches]
|
||||
pbar = tqdm_async(
|
||||
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
|
||||
)
|
||||
embedding_tasks = [self.embedding_func(batch) for batch in batches]
|
||||
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||
|
||||
embeddings = np.concatenate(embeddings_list)
|
||||
for i, d in enumerate(list_data):
|
||||
d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Any, final
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
@@ -71,15 +70,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
||||
for i in range(0, len(contents), self._max_batch_size)
|
||||
]
|
||||
|
||||
async def wrapped_task(batch):
|
||||
result = await self.embedding_func(batch)
|
||||
pbar.update(1)
|
||||
return result
|
||||
|
||||
embedding_tasks = [wrapped_task(batch) for batch in batches]
|
||||
pbar = tqdm_async(
|
||||
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
|
||||
)
|
||||
embedding_tasks = [self.embedding_func(batch) for batch in batches]
|
||||
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||
|
||||
embeddings = np.concatenate(embeddings_list)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import html
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, cast, final
|
||||
from typing import Any, final
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -14,8 +13,16 @@ from lightrag.utils import (
|
||||
from lightrag.base import (
|
||||
BaseGraphStorage,
|
||||
)
|
||||
import pipmaster as pm
|
||||
|
||||
if not pm.is_installed("graspologic"):
|
||||
pm.install("graspologic")
|
||||
|
||||
if not pm.is_installed("networkx"):
|
||||
pm.install("networkx")
|
||||
|
||||
try:
|
||||
from graspologic import embed
|
||||
import networkx as nx
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
@@ -39,21 +46,6 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
)
|
||||
nx.write_graphml(graph, file_name)
|
||||
|
||||
@staticmethod
|
||||
def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
|
||||
"""Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
|
||||
Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
|
||||
"""
|
||||
from graspologic.utils import largest_connected_component
|
||||
|
||||
graph = graph.copy()
|
||||
graph = cast(nx.Graph, largest_connected_component(graph))
|
||||
node_mapping = {
|
||||
node: html.unescape(node.upper().strip()) for node in graph.nodes()
|
||||
} # type: ignore
|
||||
graph = nx.relabel_nodes(graph, node_mapping)
|
||||
return NetworkXStorage._stabilize_graph(graph)
|
||||
|
||||
@staticmethod
|
||||
def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
|
||||
"""Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
|
||||
@@ -153,8 +145,6 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
|
||||
# @TODO: NOT USED
|
||||
async def _node2vec_embed(self):
|
||||
from graspologic import embed
|
||||
|
||||
embeddings, nodes = embed.node2vec_embed(
|
||||
self._graph,
|
||||
**self.global_config["node2vec_params"],
|
||||
|
@@ -20,10 +20,14 @@ from ..utils import logger
|
||||
|
||||
import pipmaster as pm
|
||||
|
||||
if not pm.is_installed("graspologic"):
|
||||
pm.install("graspologic")
|
||||
|
||||
if not pm.is_installed("oracledb"):
|
||||
pm.install("oracledb")
|
||||
|
||||
try:
|
||||
from graspologic import embed
|
||||
import oracledb
|
||||
|
||||
except ImportError as e:
|
||||
@@ -452,8 +456,6 @@ class OracleGraphStorage(BaseGraphStorage):
|
||||
|
||||
async def _node2vec_embed(self):
|
||||
"""为节点生成向量"""
|
||||
from graspologic import embed
|
||||
|
||||
embeddings, nodes = embed.node2vec_embed(
|
||||
self._graph,
|
||||
**self.config["node2vec_params"],
|
||||
|
@@ -41,7 +41,6 @@ if not pm.is_installed("asyncpg"):
|
||||
|
||||
try:
|
||||
import asyncpg
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
@@ -380,15 +379,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||
for i in range(0, len(contents), self._max_batch_size)
|
||||
]
|
||||
|
||||
async def wrapped_task(batch):
|
||||
result = await self.embedding_func(batch)
|
||||
pbar.update(1)
|
||||
return result
|
||||
|
||||
embedding_tasks = [wrapped_task(batch) for batch in batches]
|
||||
pbar = tqdm_async(
|
||||
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
|
||||
)
|
||||
embedding_tasks = [self.embedding_func(batch) for batch in batches]
|
||||
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||
|
||||
embeddings = np.concatenate(embeddings_list)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Any, final
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
import hashlib
|
||||
@@ -110,15 +109,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
|
||||
for i in range(0, len(contents), self._max_batch_size)
|
||||
]
|
||||
|
||||
async def wrapped_task(batch):
|
||||
result = await self.embedding_func(batch)
|
||||
pbar.update(1)
|
||||
return result
|
||||
|
||||
embedding_tasks = [wrapped_task(batch) for batch in batches]
|
||||
pbar = tqdm_async(
|
||||
total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
|
||||
)
|
||||
embedding_tasks = [self.embedding_func(batch) for batch in batches]
|
||||
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||
|
||||
embeddings = np.concatenate(embeddings_list)
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import os
|
||||
from typing import Any, final
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
from dataclasses import dataclass
|
||||
import pipmaster as pm
|
||||
import configparser
|
||||
@@ -51,7 +50,8 @@ class RedisKVStorage(BaseKVStorage):
|
||||
|
||||
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
||||
pipe = self._redis.pipeline()
|
||||
for k, v in tqdm_async(data.items(), desc="Upserting"):
|
||||
|
||||
for k, v in data.items():
|
||||
pipe.set(f"{self.namespace}:{k}", json.dumps(v))
|
||||
await pipe.execute()
|
||||
|
||||
|
@@ -7,7 +7,6 @@ import numpy as np
|
||||
|
||||
from lightrag.types import KnowledgeGraph
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
|
||||
from ..namespace import NameSpace, is_namespace
|
||||
@@ -270,15 +269,8 @@ class TiDBVectorDBStorage(BaseVectorStorage):
|
||||
for i in range(0, len(contents), self._max_batch_size)
|
||||
]
|
||||
embedding_tasks = [self.embedding_func(batch) for batch in batches]
|
||||
embeddings_list = []
|
||||
for f in tqdm(
|
||||
asyncio.as_completed(embedding_tasks),
|
||||
total=len(embedding_tasks),
|
||||
desc="Generating embeddings",
|
||||
unit="batch",
|
||||
):
|
||||
embeddings = await f
|
||||
embeddings_list.append(embeddings)
|
||||
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||
|
||||
embeddings = np.concatenate(embeddings_list)
|
||||
for i, d in enumerate(list_data):
|
||||
d["content_vector"] = embeddings[i]
|
||||
|
@@ -1,46 +1,3 @@
|
||||
"""
|
||||
Azure OpenAI LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with aure openai's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- openai
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.azure_openai import azure_openai_model_complete, azure_openai_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
|
||||
import os
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
|
@@ -1,46 +1,3 @@
|
||||
"""
|
||||
Bedrock LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with Bedrock's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- aioboto3, tenacity
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.bebrock import bebrock_model_complete, bebrock_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
|
||||
import copy
|
||||
import os
|
||||
import json
|
||||
|
@@ -1,47 +1,7 @@
|
||||
"""
|
||||
Hugging face LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with Hugging face's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- transformers
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.hf import hf_model_complete, hf_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
import copy
|
||||
import os
|
||||
from functools import lru_cache
|
||||
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
# install specific modules
|
||||
@@ -51,9 +11,12 @@ if not pm.is_installed("torch"):
|
||||
pm.install("torch")
|
||||
if not pm.is_installed("tenacity"):
|
||||
pm.install("tenacity")
|
||||
if not pm.is_installed("numpy"):
|
||||
pm.install("numpy")
|
||||
if not pm.is_installed("tenacity"):
|
||||
pm.install("tenacity")
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from functools import lru_cache
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
|
@@ -1,43 +1,3 @@
|
||||
"""
|
||||
Jina Embedding Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with jina system,
|
||||
including embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added embedding generation
|
||||
|
||||
Dependencies:
|
||||
- tenacity
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.jina import jina_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
import os
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
|
@@ -1,45 +1,3 @@
|
||||
"""
|
||||
LMDeploy LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with LMDeploy's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- tenacity
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.lmdeploy import lmdeploy_model_complete, lmdeploy_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
# install specific modules
|
||||
|
@@ -1,66 +1,3 @@
|
||||
"""
|
||||
LoLLMs (Lord of Large Language Models) Interface Module
|
||||
=====================================================
|
||||
|
||||
This module provides the official interface for interacting with LoLLMs (Lord of Large Language and multimodal Systems),
|
||||
a unified framework for AI model interaction and deployment.
|
||||
|
||||
LoLLMs is designed as a "one tool to rule them all" solution, providing seamless integration
|
||||
with various AI models while maintaining high performance and user-friendly interfaces.
|
||||
|
||||
Author: ParisNeo
|
||||
Created: 2024-01-24
|
||||
License: Apache 2.0
|
||||
|
||||
Copyright (c) 2024 ParisNeo
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Version: 2.0.0
|
||||
|
||||
Change Log:
|
||||
- 2.0.0 (2024-01-24):
|
||||
* Added async support for model inference
|
||||
* Implemented streaming capabilities
|
||||
* Added embedding generation functionality
|
||||
* Enhanced parameter handling
|
||||
* Improved error handling and timeout management
|
||||
|
||||
Dependencies:
|
||||
- aiohttp
|
||||
- numpy
|
||||
- Python >= 3.10
|
||||
|
||||
Features:
|
||||
- Async text generation with streaming support
|
||||
- Embedding generation
|
||||
- Configurable model parameters
|
||||
- System prompt and chat history support
|
||||
- Timeout handling
|
||||
- API key authentication
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.lollms import lollms_model_complete, lollms_embed
|
||||
|
||||
Project Repository: https://github.com/ParisNeo/lollms
|
||||
Documentation: https://github.com/ParisNeo/lollms/docs
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "ParisNeo"
|
||||
__status__ = "Production"
|
||||
__project_url__ = "https://github.com/ParisNeo/lollms"
|
||||
__doc_url__ = "https://github.com/ParisNeo/lollms/docs"
|
||||
import sys
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
|
@@ -1,46 +1,3 @@
|
||||
"""
|
||||
OpenAI LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with openai's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- openai
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.nvidia_openai import nvidia_openai_model_complete, nvidia_openai_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
@@ -48,6 +5,7 @@ if sys.version_info < (3, 9):
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
# install specific modules
|
||||
|
@@ -1,51 +1,10 @@
|
||||
"""
|
||||
Ollama LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with Ollama's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- ollama
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.ollama_interface import ollama_model_complete, ollama_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
from typing import AsyncIterator
|
||||
else:
|
||||
from collections.abc import AsyncIterator
|
||||
|
||||
import pipmaster as pm # Pipmaster for dynamic library install
|
||||
|
||||
# install specific modules
|
||||
@@ -54,7 +13,9 @@ if not pm.is_installed("ollama"):
|
||||
if not pm.is_installed("tenacity"):
|
||||
pm.install("tenacity")
|
||||
|
||||
|
||||
import ollama
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
@@ -67,7 +28,7 @@ from lightrag.exceptions import (
|
||||
APITimeoutError,
|
||||
)
|
||||
from lightrag.api import __api_version__
|
||||
from lightrag.utils import extract_reasoning
|
||||
|
||||
import numpy as np
|
||||
from typing import Union
|
||||
|
||||
@@ -79,7 +40,7 @@ from typing import Union
|
||||
(RateLimitError, APIConnectionError, APITimeoutError)
|
||||
),
|
||||
)
|
||||
async def ollama_model_if_cache(
|
||||
async def _ollama_model_if_cache(
|
||||
model,
|
||||
prompt,
|
||||
system_prompt=None,
|
||||
@@ -87,7 +48,7 @@ async def ollama_model_if_cache(
|
||||
**kwargs,
|
||||
) -> Union[str, AsyncIterator[str]]:
|
||||
stream = True if kwargs.get("stream") else False
|
||||
reasoning_tag = kwargs.pop("reasoning_tag", None)
|
||||
|
||||
kwargs.pop("max_tokens", None)
|
||||
# kwargs.pop("response_format", None) # allow json
|
||||
host = kwargs.pop("host", None)
|
||||
@@ -125,11 +86,7 @@ async def ollama_model_if_cache(
|
||||
response and can simply be trimmed.
|
||||
"""
|
||||
|
||||
return (
|
||||
model_response
|
||||
if reasoning_tag is None
|
||||
else extract_reasoning(model_response, reasoning_tag).response_content
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
||||
async def ollama_model_complete(
|
||||
@@ -139,7 +96,7 @@ async def ollama_model_complete(
|
||||
if keyword_extraction:
|
||||
kwargs["format"] = "json"
|
||||
model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
|
||||
return await ollama_model_if_cache(
|
||||
return await _ollama_model_if_cache(
|
||||
model_name,
|
||||
prompt,
|
||||
system_prompt=system_prompt,
|
||||
|
@@ -1,45 +1,3 @@
|
||||
"""
|
||||
OpenAI LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with openai's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- openai
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.openai import openai_model_complete, openai_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
from ..utils import verbose_debug, VERBOSE_DEBUG
|
||||
import sys
|
||||
import os
|
||||
|
@@ -1,43 +1,3 @@
|
||||
"""
|
||||
SiliconCloud Embedding Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with SiliconCloud system,
|
||||
including embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added embedding generation
|
||||
|
||||
Dependencies:
|
||||
- tenacity
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.siliconcloud import siliconcloud_model_complete, siliconcloud_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
|
@@ -1,45 +1,3 @@
|
||||
"""
|
||||
Zhipu LLM Interface Module
|
||||
==========================
|
||||
|
||||
This module provides interfaces for interacting with LMDeploy's language models,
|
||||
including text generation and embedding capabilities.
|
||||
|
||||
Author: Lightrag team
|
||||
Created: 2024-01-24
|
||||
License: MIT License
|
||||
|
||||
Copyright (c) 2024 Lightrag
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
Version: 1.0.0
|
||||
|
||||
Change Log:
|
||||
- 1.0.0 (2024-01-24): Initial release
|
||||
* Added async chat completion support
|
||||
* Added embedding generation
|
||||
* Added stream response capability
|
||||
|
||||
Dependencies:
|
||||
- tenacity
|
||||
- numpy
|
||||
- pipmaster
|
||||
- Python >= 3.10
|
||||
|
||||
Usage:
|
||||
from llm_interfaces.zhipu import zhipu_model_complete, zhipu_embed
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
import sys
|
||||
import re
|
||||
import json
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
from typing import Any, AsyncIterator
|
||||
from collections import Counter, defaultdict
|
||||
from .utils import (
|
||||
@@ -500,16 +499,8 @@ async def extract_entities(
|
||||
)
|
||||
return dict(maybe_nodes), dict(maybe_edges)
|
||||
|
||||
results = []
|
||||
for result in tqdm_async(
|
||||
asyncio.as_completed([_process_single_content(c) for c in ordered_chunks]),
|
||||
total=len(ordered_chunks),
|
||||
desc="Level 2 - Extracting entities and relationships",
|
||||
unit="chunk",
|
||||
position=1,
|
||||
leave=False,
|
||||
):
|
||||
results.append(await result)
|
||||
tasks = [_process_single_content(c) for c in ordered_chunks]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
maybe_nodes = defaultdict(list)
|
||||
maybe_edges = defaultdict(list)
|
||||
@@ -518,41 +509,20 @@ async def extract_entities(
|
||||
maybe_nodes[k].extend(v)
|
||||
for k, v in m_edges.items():
|
||||
maybe_edges[tuple(sorted(k))].extend(v)
|
||||
logger.debug("Inserting entities into storage...")
|
||||
all_entities_data = []
|
||||
for result in tqdm_async(
|
||||
asyncio.as_completed(
|
||||
[
|
||||
_merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
|
||||
for k, v in maybe_nodes.items()
|
||||
]
|
||||
),
|
||||
total=len(maybe_nodes),
|
||||
desc="Level 3 - Inserting entities",
|
||||
unit="entity",
|
||||
position=2,
|
||||
leave=False,
|
||||
):
|
||||
all_entities_data.append(await result)
|
||||
|
||||
logger.debug("Inserting relationships into storage...")
|
||||
all_relationships_data = []
|
||||
for result in tqdm_async(
|
||||
asyncio.as_completed(
|
||||
[
|
||||
_merge_edges_then_upsert(
|
||||
k[0], k[1], v, knowledge_graph_inst, global_config
|
||||
)
|
||||
for k, v in maybe_edges.items()
|
||||
]
|
||||
),
|
||||
total=len(maybe_edges),
|
||||
desc="Level 3 - Inserting relationships",
|
||||
unit="relationship",
|
||||
position=3,
|
||||
leave=False,
|
||||
):
|
||||
all_relationships_data.append(await result)
|
||||
all_entities_data = await asyncio.gather(
|
||||
*[
|
||||
_merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
|
||||
for k, v in maybe_nodes.items()
|
||||
]
|
||||
)
|
||||
|
||||
all_relationships_data = await asyncio.gather(
|
||||
*[
|
||||
_merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
|
||||
for k, v in maybe_edges.items()
|
||||
]
|
||||
)
|
||||
|
||||
if not len(all_entities_data) and not len(all_relationships_data):
|
||||
logger.warning(
|
||||
|
@@ -1,8 +1,9 @@
|
||||
from __future__ import annotations
|
||||
from typing import Any
|
||||
|
||||
GRAPH_FIELD_SEP = "<SEP>"
|
||||
|
||||
PROMPTS = {}
|
||||
PROMPTS: dict[str, Any] = {}
|
||||
|
||||
PROMPTS["DEFAULT_LANGUAGE"] = "English"
|
||||
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
|
||||
|
@@ -1,10 +1,3 @@
|
||||
"""
|
||||
3D GraphML Viewer using Dear ImGui and ModernGL
|
||||
Author: ParisNeo, ArnoChen
|
||||
Description: An interactive 3D GraphML viewer using imgui_bundle and ModernGL
|
||||
Version: 2.0
|
||||
"""
|
||||
|
||||
from typing import Optional, Tuple, Dict, List
|
||||
import numpy as np
|
||||
import networkx as nx
|
||||
|
@@ -13,13 +13,12 @@ from functools import wraps
|
||||
from hashlib import md5
|
||||
from typing import Any, Callable
|
||||
import xml.etree.ElementTree as ET
|
||||
import bs4
|
||||
|
||||
import numpy as np
|
||||
import tiktoken
|
||||
|
||||
from lightrag.prompt import PROMPTS
|
||||
|
||||
|
||||
VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
|
||||
|
||||
|
||||
@@ -84,13 +83,6 @@ class EmbeddingFunc:
|
||||
return await self.func(*args, **kwargs)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReasoningResponse:
|
||||
reasoning_content: str | None
|
||||
response_content: str
|
||||
tag: str
|
||||
|
||||
|
||||
def locate_json_string_body_from_string(content: str) -> str | None:
|
||||
"""Locate the JSON string body from a string"""
|
||||
try:
|
||||
@@ -721,28 +713,3 @@ def get_conversation_turns(
|
||||
)
|
||||
|
||||
return "\n".join(formatted_turns)
|
||||
|
||||
|
||||
def extract_reasoning(response: str, tag: str) -> ReasoningResponse:
|
||||
"""Extract the reasoning section and the following section from the LLM response.
|
||||
|
||||
Args:
|
||||
response: LLM response
|
||||
tag: Tag to extract
|
||||
Returns:
|
||||
ReasoningResponse: Reasoning section and following section
|
||||
|
||||
"""
|
||||
soup = bs4.BeautifulSoup(response, "html.parser")
|
||||
|
||||
reasoning_section = soup.find(tag)
|
||||
if reasoning_section is None:
|
||||
return ReasoningResponse(None, response, tag)
|
||||
reasoning_content = reasoning_section.get_text().strip()
|
||||
|
||||
after_reasoning_section = reasoning_section.next_sibling
|
||||
if after_reasoning_section is None:
|
||||
return ReasoningResponse(reasoning_content, "", tag)
|
||||
after_reasoning_content = after_reasoning_section.get_text().strip()
|
||||
|
||||
return ReasoningResponse(reasoning_content, after_reasoning_content, tag)
|
||||
|
Reference in New Issue
Block a user