diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index db61788a..72554791 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -474,6 +474,11 @@ class LightRAG:
         storage_class = lazy_external_import(import_path, storage_name)
         return storage_class
 
+    @staticmethod
+    def clean_text(text: str) -> str:
+        """Clean text by removing null bytes (0x00) and whitespace"""
+        return text.strip().replace('\x00', '')
+
     def insert(
         self,
         input: str | list[str],
@@ -521,8 +526,13 @@ class LightRAG:
     ) -> None:
         update_storage = False
         try:
-            doc_key = compute_mdhash_id(full_text.strip(), prefix="doc-")
-            new_docs = {doc_key: {"content": full_text.strip()}}
+            # Clean input texts
+            full_text = self.clean_text(full_text)
+            text_chunks = [self.clean_text(chunk) for chunk in text_chunks]
+
+            # Process cleaned texts
+            doc_key = compute_mdhash_id(full_text, prefix="doc-")
+            new_docs = {doc_key: {"content": full_text}}
 
             _add_doc_keys = await self.full_docs.filter_keys(set(doc_key))
             new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
@@ -535,11 +545,10 @@ class LightRAG:
 
             inserting_chunks: dict[str, Any] = {}
             for chunk_text in text_chunks:
-                chunk_text_stripped = chunk_text.strip()
-                chunk_key = compute_mdhash_id(chunk_text_stripped, prefix="chunk-")
+                chunk_key = compute_mdhash_id(chunk_text, prefix="chunk-")
 
                 inserting_chunks[chunk_key] = {
-                    "content": chunk_text_stripped,
+                    "content": chunk_text,
                     "full_doc_id": doc_key,
                 }
 
@@ -576,8 +585,8 @@ class LightRAG:
         if isinstance(input, str):
             input = [input]
 
-        # 1. Remove duplicate contents from the list
-        unique_contents = list(set(doc.strip() for doc in input))
+        # Clean input text and remove duplicates
+        unique_contents = list(set(self.clean_text(doc) for doc in input))
 
         # 2. Generate document IDs and initial status
         new_docs: dict[str, Any] = {
@@ -779,7 +788,7 @@ class LightRAG:
             all_chunks_data: dict[str, dict[str, str]] = {}
             chunk_to_source_map: dict[str, str] = {}
             for chunk_data in custom_kg.get("chunks", {}):
-                chunk_content = chunk_data["content"].strip()
+                chunk_content = self.clean_text(chunk_data["content"])
                 source_id = chunk_data["source_id"]
                 tokens = len(
                     encode_string_by_tiktoken(