Merge branch 'improve-property-tooltip' into loginPage

2025-03-15 00:11:50 +08:00
parent 10c23aabfd 063ad8a35a
commit b58729751a
57 changed files with 1822 additions and 606 deletions
--- a/lightrag/api/gunicorn_config.py
+++ b/lightrag/api/gunicorn_config.py
@@ -59,7 +59,7 @@ logconfig_dict = {
    },
    "filters": {
        "path_filter": {
-            "()": "lightrag.api.lightrag_server.LightragPathFilter",
+            "()": "lightrag.utils.LightragPathFilter",
        },
    },
    "loggers": {
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -55,41 +55,6 @@ config = configparser.ConfigParser()
 config.read("config.ini")


-class LightragPathFilter(logging.Filter):
-    """Filter for lightrag logger to filter out frequent path access logs"""
-
-    def __init__(self):
-        super().__init__()
-        # Define paths to be filtered
-        self.filtered_paths = ["/documents", "/health", "/webui/"]
-
-    def filter(self, record):
-        try:
-            # Check if record has the required attributes for an access log
-            if not hasattr(record, "args") or not isinstance(record.args, tuple):
-                return True
-            if len(record.args) < 5:
-                return True
-
-            # Extract method, path and status from the record args
-            method = record.args[1]
-            path = record.args[2]
-            status = record.args[4]
-
-            # Filter out successful GET requests to filtered paths
-            if (
-                method == "GET"
-                and (status == 200 or status == 304)
-                and path in self.filtered_paths
-            ):
-                return False
-
-            return True
-        except Exception:
-            # In case of any error, let the message through
-            return True
-
-
 def create_app(args):
    # Setup logging
    logger.setLevel(args.log_level)
@@ -177,6 +142,9 @@ def create_app(args):
        if api_key
        else "",
        version=__api_version__,
+        openapi_url="/openapi.json",  # Explicitly set OpenAPI schema URL
+        docs_url="/docs",  # Explicitly set docs URL
+        redoc_url="/redoc",  # Explicitly set redoc URL
        openapi_tags=[{"name": "api"}],
        lifespan=lifespan,
    )
@@ -423,12 +391,24 @@ def create_app(args):
            "update_status": update_status,
        }

+    # Custom StaticFiles class to prevent caching of HTML files
+    class NoCacheStaticFiles(StaticFiles):
+        async def get_response(self, path: str, scope):
+            response = await super().get_response(path, scope)
+            if path.endswith(".html"):
+                response.headers["Cache-Control"] = (
+                    "no-cache, no-store, must-revalidate"
+                )
+                response.headers["Pragma"] = "no-cache"
+                response.headers["Expires"] = "0"
+            return response
+
    # Webui mount webui/index.html
    static_dir = Path(__file__).parent / "webui"
    static_dir.mkdir(exist_ok=True)
    app.mount(
        "/webui",
-        StaticFiles(directory=static_dir, html=True, check_dir=True),
+        NoCacheStaticFiles(directory=static_dir, html=True, check_dir=True),
        name="webui",
    )

@@ -516,7 +496,7 @@ def configure_logging():
            },
            "filters": {
                "path_filter": {
-                    "()": "lightrag.api.lightrag_server.LightragPathFilter",
+                    "()": "lightrag.utils.LightragPathFilter",
                },
            },
        }
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@@ -99,6 +99,37 @@ class DocsStatusesResponse(BaseModel):
    statuses: Dict[DocStatus, List[DocStatusResponse]] = {}


+class PipelineStatusResponse(BaseModel):
+    """Response model for pipeline status
+
+    Attributes:
+        autoscanned: Whether auto-scan has started
+        busy: Whether the pipeline is currently busy
+        job_name: Current job name (e.g., indexing files/indexing texts)
+        job_start: Job start time as ISO format string (optional)
+        docs: Total number of documents to be indexed
+        batchs: Number of batches for processing documents
+        cur_batch: Current processing batch
+        request_pending: Flag for pending request for processing
+        latest_message: Latest message from pipeline processing
+        history_messages: List of history messages
+    """
+
+    autoscanned: bool = False
+    busy: bool = False
+    job_name: str = "Default Job"
+    job_start: Optional[str] = None
+    docs: int = 0
+    batchs: int = 0
+    cur_batch: int = 0
+    request_pending: bool = False
+    latest_message: str = ""
+    history_messages: Optional[List[str]] = None
+
+    class Config:
+        extra = "allow"  # Allow additional fields from the pipeline status
+
+
 class DocumentManager:
    def __init__(
        self,
@@ -247,7 +278,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
                if global_args["main_args"].document_loading_engine == "DOCLING":
                    if not pm.is_installed("docling"):  # type: ignore
                        pm.install("docling")
-                    from docling.document_converter import DocumentConverter
+                    from docling.document_converter import DocumentConverter  # type: ignore

                    converter = DocumentConverter()
                    result = converter.convert(file_path)
@@ -266,7 +297,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
                if global_args["main_args"].document_loading_engine == "DOCLING":
                    if not pm.is_installed("docling"):  # type: ignore
                        pm.install("docling")
-                    from docling.document_converter import DocumentConverter
+                    from docling.document_converter import DocumentConverter  # type: ignore

                    converter = DocumentConverter()
                    result = converter.convert(file_path)
@@ -286,7 +317,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
                if global_args["main_args"].document_loading_engine == "DOCLING":
                    if not pm.is_installed("docling"):  # type: ignore
                        pm.install("docling")
-                    from docling.document_converter import DocumentConverter
+                    from docling.document_converter import DocumentConverter  # type: ignore

                    converter = DocumentConverter()
                    result = converter.convert(file_path)
@@ -307,7 +338,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
                if global_args["main_args"].document_loading_engine == "DOCLING":
                    if not pm.is_installed("docling"):  # type: ignore
                        pm.install("docling")
-                    from docling.document_converter import DocumentConverter
+                    from docling.document_converter import DocumentConverter  # type: ignore

                    converter = DocumentConverter()
                    result = converter.convert(file_path)
@@ -718,17 +749,33 @@ def create_document_routes(
            logger.error(traceback.format_exc())
            raise HTTPException(status_code=500, detail=str(e))

-    @router.get("/pipeline_status", dependencies=[Depends(optional_api_key)])
-    async def get_pipeline_status():
+    @router.get(
+        "/pipeline_status",
+        dependencies=[Depends(optional_api_key)],
+        response_model=PipelineStatusResponse,
+    )
+    async def get_pipeline_status() -> PipelineStatusResponse:
        """
        Get the current status of the document indexing pipeline.

        This endpoint returns information about the current state of the document processing pipeline,
-        including whether it's busy, the current job name, when it started, how many documents
-        are being processed, how many batches there are, and which batch is currently being processed.
+        including the processing status, progress information, and history messages.

        Returns:
-            dict: A dictionary containing the pipeline status information
+            PipelineStatusResponse: A response object containing:
+                - autoscanned (bool): Whether auto-scan has started
+                - busy (bool): Whether the pipeline is currently busy
+                - job_name (str): Current job name (e.g., indexing files/indexing texts)
+                - job_start (str, optional): Job start time as ISO format string
+                - docs (int): Total number of documents to be indexed
+                - batchs (int): Number of batches for processing documents
+                - cur_batch (int): Current processing batch
+                - request_pending (bool): Flag for pending request for processing
+                - latest_message (str): Latest message from pipeline processing
+                - history_messages (List[str], optional): List of history messages
+
+        Raises:
+            HTTPException: If an error occurs while retrieving pipeline status (500)
        """
        try:
            from lightrag.kg.shared_storage import get_namespace_data
@@ -746,7 +793,7 @@ def create_document_routes(
            if status_dict.get("job_start"):
                status_dict["job_start"] = str(status_dict["job_start"])

-            return status_dict
+            return PipelineStatusResponse(**status_dict)
        except Exception as e:
            logger.error(f"Error getting pipeline status: {str(e)}")
            logger.error(traceback.format_exc())
--- a/lightrag/api/webui/index.html
+++ b/lightrag/api/webui/index.html
@@ -0,0 +1,17 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
+    <meta http-equiv="Pragma" content="no-cache" />
+    <meta http-equiv="Expires" content="0" />
+    <link rel="icon" type="image/svg+xml" href="./logo.png" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Lightrag</title>
+    <script type="module" crossorigin src="./assets/index-DwcJE583.js"></script>
+    <link rel="stylesheet" crossorigin href="./assets/index-BV5s8k-a.css">
+  </head>
+  <body>
+    <div id="root"></div>
+  </body>
+</html>