fix lint

2025-03-17 23:36:00 +08:00
parent bf18a5406e
commit 6115f60072
2 changed files with 73 additions and 45 deletions
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -563,7 +563,9 @@ class LightRAG:
        """
        loop = always_get_an_event_loop()
        loop.run_until_complete(
-            self.ainsert(input, split_by_character, split_by_character_only, ids, file_paths)
+            self.ainsert(
                input, split_by_character, split_by_character_only, ids, file_paths
            )
        )
    async def ainsert(
@@ -659,7 +661,10 @@ class LightRAG:
                await self._insert_done()
    async def apipeline_enqueue_documents(
-        self, input: str | list[str], ids: list[str] | None = None, file_paths: str | list[str] | None = None
+        self,
        input: str | list[str],
        ids: list[str] | None = None,
        file_paths: str | list[str] | None = None,
    ) -> None:
        """
        Pipeline for Processing Documents
@@ -687,7 +692,9 @@ class LightRAG:
            if isinstance(file_paths, str):
                file_paths = [file_paths]
            if len(file_paths) != len(input):
-                raise ValueError("Number of file paths must match the number of documents")
+                raise ValueError(
                    "Number of file paths must match the number of documents"
                )
        else:
            # If no file paths provided, use placeholder
            file_paths = ["unknown_source"] * len(input)
@@ -703,11 +710,15 @@ class LightRAG:
                raise ValueError("IDs must be unique")
            # Generate contents dict of IDs provided by user and documents
-            contents = {id_: {"content": doc, "file_path": path} 
+            contents = {
-                       for id_, doc, path in zip(ids, input, file_paths)}
+                id_: {"content": doc, "file_path": path}
                for id_, doc, path in zip(ids, input, file_paths)
            }
        else:
            # Clean input text and remove duplicates
-            cleaned_input = [(clean_text(doc), path) for doc, path in zip(input, file_paths)]
+            cleaned_input = [
                (clean_text(doc), path) for doc, path in zip(input, file_paths)
            ]
            unique_content_with_paths = {}
            # Keep track of unique content and their paths
@@ -716,9 +727,13 @@ class LightRAG:
                    unique_content_with_paths[content] = path
            # Generate contents dict of MD5 hash IDs and documents with paths
-            contents = {compute_mdhash_id(content, prefix="doc-"): 
+            contents = {
-                       {"content": content, "file_path": path} 
+                compute_mdhash_id(content, prefix="doc-"): {
-                       for content, path in unique_content_with_paths.items()}
+                    "content": content,
                    "file_path": path,
                }
                for content, path in unique_content_with_paths.items()
            }
        # 2. Remove duplicate contents
        unique_contents = {}
@@ -729,8 +744,10 @@ class LightRAG:
                unique_contents[content] = (id_, file_path)
        # Reconstruct contents with unique content
-        contents = {id_: {"content": content, "file_path": file_path} 
+        contents = {
-                   for content, (id_, file_path) in unique_contents.items()}
+            id_: {"content": content, "file_path": file_path}
            for content, (id_, file_path) in unique_contents.items()
        }
        # 3. Generate document initial status
        new_docs: dict[str, Any] = {
@@ -741,7 +758,9 @@ class LightRAG:
                "content_length": len(content_data["content"]),
                "created_at": datetime.now().isoformat(),
                "updated_at": datetime.now().isoformat(),
-                "file_path": content_data["file_path"],  # Store file path in document status
+                "file_path": content_data[
                    "file_path"
                ],  # Store file path in document status
            }
            for id_, content_data in contents.items()
        }
@@ -1109,7 +1128,10 @@ class LightRAG:
        loop.run_until_complete(self.ainsert_custom_kg(custom_kg, full_doc_id))
    async def ainsert_custom_kg(
-        self, custom_kg: dict[str, Any], full_doc_id: str = None, file_path: str = "custom_kg"
+        self,
        custom_kg: dict[str, Any],
        full_doc_id: str = None,
        file_path: str = "custom_kg",
    ) -> None:
        update_storage = False
        try:
@@ -3125,4 +3147,3 @@ class LightRAG:
                ]
            ]
        )
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -224,7 +224,9 @@ async def _merge_nodes_then_upsert(
            split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
        )
        already_file_paths.extend(
-            split_string_by_multi_markers(already_node["metadata"]["file_path"], [GRAPH_FIELD_SEP])
+            split_string_by_multi_markers(
                already_node["metadata"]["file_path"], [GRAPH_FIELD_SEP]
            )
        )
        already_description.append(already_node["description"])
@@ -336,7 +338,14 @@ async def _merge_edges_then_upsert(
        )
    )
    file_path = GRAPH_FIELD_SEP.join(
-        set([dp["metadata"]["file_path"] for dp in edges_data if dp.get("metadata", {}).get("file_path")] + already_file_paths)
+        set(
            [
                dp["metadata"]["file_path"]
                for dp in edges_data
                if dp.get("metadata", {}).get("file_path")
            ]
            + already_file_paths
        )
    )
    for need_insert_id in [src_id, tgt_id]:
@@ -482,7 +491,9 @@ async def extract_entities(
        else:
            return await use_llm_func(input_text)
-    async def _process_extraction_result(result: str, chunk_key: str, file_path: str = "unknown_source"):
+    async def _process_extraction_result(
        result: str, chunk_key: str, file_path: str = "unknown_source"
    ):
        """Process a single extraction result (either initial or gleaning)
        Args:
            result (str): The extraction result to process
@@ -669,7 +680,9 @@ async def extract_entities(
                "file_path": dp.get("metadata", {}).get("file_path", "unknown_source"),
                "metadata": {
                    "created_at": dp.get("metadata", {}).get("created_at", time.time()),
-                    "file_path": dp.get("metadata", {}).get("file_path", "unknown_source"),
+                    "file_path": dp.get("metadata", {}).get(
                        "file_path", "unknown_source"
                    ),
                },
            }
            for dp in all_entities_data
@@ -687,7 +700,9 @@ async def extract_entities(
                "file_path": dp.get("metadata", {}).get("file_path", "unknown_source"),
                "metadata": {
                    "created_at": dp.get("metadata", {}).get("created_at", time.time()),
-                    "file_path": dp.get("metadata", {}).get("file_path", "unknown_source"),
+                    "file_path": dp.get("metadata", {}).get(
                        "file_path", "unknown_source"
                    ),
                },
            }
            for dp in all_relationships_data
@@ -1574,15 +1589,7 @@ async def _get_edge_data(
    relations_context = list_of_list_to_csv(relations_section_list)
    entites_section_list = [
-        [
+        ["id", "entity", "type", "description", "rank", "created_at", "file_path"]
            "id", 
            "entity", 
            "type", 
            "description", 
            "rank", 
            "created_at", 
            "file_path"
        ]
    ]
    for i, n in enumerate(use_entities):
        created_at = n.get("created_at", "Unknown")