From 791330400788e360fd3e985fad5fac59e7fec21c Mon Sep 17 00:00:00 2001
From: Lukas Selch <selchlukas@icloud.com>
Date: Mon, 17 Feb 2025 15:12:35 +0100
Subject: [PATCH 1/6] Fixed broken ainsert_custom_kg()

---
 lightrag/lightrag.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index bf1c02d2..7b3e8605 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -36,6 +36,7 @@ from .utils import (
     limit_async_func_call,
     logger,
     set_logger,
+    encode_string_by_tiktoken,
 )
 from .types import KnowledgeGraph
 
@@ -863,7 +864,14 @@ class LightRAG:
                 source_id = chunk_data["source_id"]
                 chunk_id = compute_mdhash_id(chunk_content.strip(), prefix="chunk-")
 
-                chunk_entry = {"content": chunk_content.strip(), "source_id": source_id}
+                chunk_entry = {
+                    "content": chunk_content.strip(),
+                    "source_id": source_id,
+                    "tokens": len(encode_string_by_tiktoken(chunk_entry["content"])),
+                    "chunk_order_id": 0,
+                    "full_doc_id": source_id,
+                    "status": DocStatus.PROCESSED
+                }
                 all_chunks_data[chunk_id] = chunk_entry
                 chunk_to_source_map[source_id] = chunk_id
                 update_storage = True

From 86f5a88db792c26094617f3c135a0078c9bfdcf1 Mon Sep 17 00:00:00 2001
From: Lukas Selch <selchlukas@icloud.com>
Date: Mon, 17 Feb 2025 15:20:23 +0100
Subject: [PATCH 2/6] Fixed wrong variable name

---
 lightrag/lightrag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 7b3e8605..2f7bb5e4 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -867,7 +867,7 @@ class LightRAG:
                 chunk_entry = {
                     "content": chunk_content.strip(),
                     "source_id": source_id,
-                    "tokens": len(encode_string_by_tiktoken(chunk_entry["content"])),
+                    "tokens": len(encode_string_by_tiktoken(chunk_content.strip())),
                     "chunk_order_id": 0,
                     "full_doc_id": source_id,
                     "status": DocStatus.PROCESSED

From 537e10303dafcc1e46fa43d65d28eae4b0f63111 Mon Sep 17 00:00:00 2001
From: Lukas Selch <selchlukas@icloud.com>
Date: Mon, 17 Feb 2025 15:25:50 +0100
Subject: [PATCH 3/6] Fixed formatting

---
 lightrag/lightrag.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 2f7bb5e4..1c3bd089 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -860,17 +860,17 @@ class LightRAG:
             all_chunks_data: dict[str, dict[str, str]] = {}
             chunk_to_source_map: dict[str, str] = {}
             for chunk_data in custom_kg.get("chunks", {}):
-                chunk_content = chunk_data["content"]
+                chunk_content = chunk_data["content"].strip()
                 source_id = chunk_data["source_id"]
-                chunk_id = compute_mdhash_id(chunk_content.strip(), prefix="chunk-")
+                chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
 
                 chunk_entry = {
-                    "content": chunk_content.strip(),
+                    "content": chunk_content,
                     "source_id": source_id,
-                    "tokens": len(encode_string_by_tiktoken(chunk_content.strip())),
+                    "tokens": len(encode_string_by_tiktoken(chunk_content)),
                     "chunk_order_id": 0,
                     "full_doc_id": source_id,
-                    "status": DocStatus.PROCESSED
+                    "status": DocStatus.PROCESSED,
                 }
                 all_chunks_data[chunk_id] = chunk_entry
                 chunk_to_source_map[source_id] = chunk_id

From bc630c862000893f09540219d6655b69c94ee3a3 Mon Sep 17 00:00:00 2001
From: Lukas Selch <selchlukas@icloud.com>
Date: Wed, 19 Feb 2025 07:15:30 +0100
Subject: [PATCH 4/6] Renamed chunk_order_index and improve token calculation

---
 lightrag/lightrag.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 1c3bd089..8513ac19 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -862,13 +862,15 @@ class LightRAG:
             for chunk_data in custom_kg.get("chunks", {}):
                 chunk_content = chunk_data["content"].strip()
                 source_id = chunk_data["source_id"]
+                tokens = len(encode_string_by_tiktoken(chunk_content, model_name=self.tiktoken_model_name))
+                chunk_order_index = 0 if "chunk_order_index" not in chunk_data.keys() else chunk_data["chunk_order_index"]
                 chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
 
                 chunk_entry = {
                     "content": chunk_content,
                     "source_id": source_id,
-                    "tokens": len(encode_string_by_tiktoken(chunk_content)),
-                    "chunk_order_id": 0,
+                    "tokens": tokens,
+                    "chunk_order_index": chunk_order_index,
                     "full_doc_id": source_id,
                     "status": DocStatus.PROCESSED,
                 }

From 701d8bb48e3e3224b357e677c5bde042963dc0de Mon Sep 17 00:00:00 2001
From: Lukas Selch <selchlukas@icloud.com>
Date: Wed, 19 Feb 2025 10:28:25 +0100
Subject: [PATCH 5/6] Applied lint

---
 lightrag/lightrag.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 8513ac19..e73e4c1b 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -862,8 +862,16 @@ class LightRAG:
             for chunk_data in custom_kg.get("chunks", {}):
                 chunk_content = chunk_data["content"].strip()
                 source_id = chunk_data["source_id"]
-                tokens = len(encode_string_by_tiktoken(chunk_content, model_name=self.tiktoken_model_name))
-                chunk_order_index = 0 if "chunk_order_index" not in chunk_data.keys() else chunk_data["chunk_order_index"]
+                tokens = len(
+                    encode_string_by_tiktoken(
+                        chunk_content, model_name=self.tiktoken_model_name
+                    )
+                )
+                chunk_order_index = (
+                    0
+                    if "chunk_order_index" not in chunk_data.keys()
+                    else chunk_data["chunk_order_index"]
+                )
                 chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
 
                 chunk_entry = {

From 7fab9accfe220f154ca4aede8e2d1f2dd3870602 Mon Sep 17 00:00:00 2001
From: Lukas Selch <selchlukas@icloud.com>
Date: Wed, 19 Feb 2025 14:58:51 +0100
Subject: [PATCH 6/6] Updated documentation examples to include
 chunk_order_index case

---
 README.md                    | 8 ++++++++
 examples/insert_custom_kg.py | 9 +++++++++
 2 files changed, 17 insertions(+)

diff --git a/README.md b/README.md
index 92a32703..f43dd370 100644
--- a/README.md
+++ b/README.md
@@ -608,14 +608,22 @@ custom_kg = {
         {
             "content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
             "source_id": "Source1",
+            "chunk_order_index": 0,
+        },
+        {
+            "content": "One outstanding feature of ProductX is its advanced AI capabilities.",
+            "source_id": "Source1",
+            "chunk_order_index": 1,
         },
         {
             "content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
             "source_id": "Source2",
+            "chunk_order_index": 0,
         },
         {
             "content": "None",
             "source_id": "UNKNOWN",
+            "chunk_order_index": 0,
         },
     ],
 }
diff --git a/examples/insert_custom_kg.py b/examples/insert_custom_kg.py
index 50ad925e..db489c96 100644
--- a/examples/insert_custom_kg.py
+++ b/examples/insert_custom_kg.py
@@ -87,18 +87,27 @@ custom_kg = {
         {
             "content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
             "source_id": "Source1",
+            "source_chunk_index": 0,
+        },
+        {
+            "content": "One outstanding feature of ProductX is its advanced AI capabilities.",
+            "source_id": "Source1",
+            "chunk_order_index": 1,
         },
         {
             "content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
             "source_id": "Source2",
+            "source_chunk_index": 0,
         },
         {
             "content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.",
             "source_id": "Source3",
+            "source_chunk_index": 0,
         },
         {
             "content": "None",
             "source_id": "UNKNOWN",
+            "source_chunk_index": 0,
         },
     ],
 }