From ecd1fc48c2df607a48bc03eebd59ea165f90cb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=20=E4=B8=9C?= <540953132@qq.com> Date: Thu, 17 Apr 2025 11:53:01 +0800 Subject: [PATCH] fix: add miss key for ainsert_custom_chunks --- lightrag/lightrag.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index daa92435..99e3ba41 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -582,6 +582,7 @@ class LightRAG: # Clean input texts full_text = clean_text(full_text) text_chunks = [clean_text(chunk) for chunk in text_chunks] + file_path = "" # Process cleaned texts if doc_id is None: @@ -600,12 +601,19 @@ class LightRAG: logger.info(f"Inserting {len(new_docs)} docs") inserting_chunks: dict[str, Any] = {} - for chunk_text in text_chunks: + for index, chunk_text in enumerate(text_chunks): chunk_key = compute_mdhash_id(chunk_text, prefix="chunk-") - + tokens = len( + encode_string_by_tiktoken( + chunk_text, model_name=self.tiktoken_model_name + ) + ) inserting_chunks[chunk_key] = { "content": chunk_text, "full_doc_id": doc_key, + "tokens": tokens, + "chunk_order_index": index, + "file_path": file_path, } doc_ids = set(inserting_chunks.keys())