From 66720409bdc22b71b5537124cdd694eb3ca2e478 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 27 Jan 2025 15:32:27 +0800 Subject: [PATCH 1/3] Enhance robustness of CVS processing ,Fix potential CSV parsing issues - Add CSV quoting for all fields - Remove null characters from CSV input - Improve CSV data integrity - Ensure consistent CSV formatting --- lightrag/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/utils.py b/lightrag/utils.py index 3454ea7c..86867c9c 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -237,13 +237,13 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: def list_of_list_to_csv(data: List[List[str]]) -> str: output = io.StringIO() - writer = csv.writer(output) + writer = csv.writer(output, quoting=csv.QUOTE_ALL) writer.writerows(data) return output.getvalue() def csv_string_to_list(csv_string: str) -> List[List[str]]: - output = io.StringIO(csv_string) + output = io.StringIO(csv_string.replace('\x00', '')) reader = csv.reader(output) return [row for row in reader] From 6d61b37c034c4c6e9230cbd735fb3cb89f02ec37 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 27 Jan 2025 16:07:03 +0800 Subject: [PATCH 2/3] Add type ignore for pptx import --- lightrag/api/lightrag_server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 28aaad8b..e3c211ea 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -918,7 +918,7 @@ def create_app(args): case ".pptx": if not pm.is_installed("pptx"): pm.install("pptx") - from pptx import Presentation + from pptx import Presentation # type: ignore # PowerPoint handling prs = Presentation(file_path) @@ -1216,7 +1216,7 @@ def create_app(args): case ".pptx": if not pm.is_installed("pptx"): pm.install("pptx") - from pptx import Presentation + from pptx import Presentation # type: ignore from io import BytesIO # Read PPTX from memory @@ -1320,7 +1320,7 @@ def create_app(args): case ".pptx": if not pm.is_installed("pptx"): pm.install("pptx") - from pptx import Presentation + from pptx import Presentation # type: ignore from io import BytesIO pptx_content = await file.read() From c8d384f15f719a8cf378bce4e889de391119115d Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 27 Jan 2025 16:12:30 +0800 Subject: [PATCH 3/3] Fix linting --- lightrag/api/lightrag_server.py | 6 +++--- lightrag/utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index e3c211ea..e661d219 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -918,7 +918,7 @@ def create_app(args): case ".pptx": if not pm.is_installed("pptx"): pm.install("pptx") - from pptx import Presentation # type: ignore + from pptx import Presentation # type: ignore # PowerPoint handling prs = Presentation(file_path) @@ -1216,7 +1216,7 @@ def create_app(args): case ".pptx": if not pm.is_installed("pptx"): pm.install("pptx") - from pptx import Presentation # type: ignore + from pptx import Presentation # type: ignore from io import BytesIO # Read PPTX from memory @@ -1320,7 +1320,7 @@ def create_app(args): case ".pptx": if not pm.is_installed("pptx"): pm.install("pptx") - from pptx import Presentation # type: ignore + from pptx import Presentation # type: ignore from io import BytesIO pptx_content = await file.read() diff --git a/lightrag/utils.py b/lightrag/utils.py index 86867c9c..ba88b7e4 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -243,7 +243,7 @@ def list_of_list_to_csv(data: List[List[str]]) -> str: def csv_string_to_list(csv_string: str) -> List[List[str]]: - output = io.StringIO(csv_string.replace('\x00', '')) + output = io.StringIO(csv_string.replace("\x00", "")) reader = csv.reader(output) return [row for row in reader]