Renamed chunk_order_index and improve token calculation
This commit is contained in:
@@ -862,13 +862,15 @@ class LightRAG:
|
||||
for chunk_data in custom_kg.get("chunks", {}):
|
||||
chunk_content = chunk_data["content"].strip()
|
||||
source_id = chunk_data["source_id"]
|
||||
tokens = len(encode_string_by_tiktoken(chunk_content, model_name=self.tiktoken_model_name))
|
||||
chunk_order_index = 0 if "chunk_order_index" not in chunk_data.keys() else chunk_data["chunk_order_index"]
|
||||
chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
|
||||
|
||||
chunk_entry = {
|
||||
"content": chunk_content,
|
||||
"source_id": source_id,
|
||||
"tokens": len(encode_string_by_tiktoken(chunk_content)),
|
||||
"chunk_order_id": 0,
|
||||
"tokens": tokens,
|
||||
"chunk_order_index": chunk_order_index,
|
||||
"full_doc_id": source_id,
|
||||
"status": DocStatus.PROCESSED,
|
||||
}
|
||||
|
Reference in New Issue
Block a user