Renamed chunk_order_index and improve token calculation

This commit is contained in:
Lukas Selch
2025-02-19 07:15:30 +01:00
parent 537e10303d
commit bc630c8620

View File

@@ -862,13 +862,15 @@ class LightRAG:
for chunk_data in custom_kg.get("chunks", {}): for chunk_data in custom_kg.get("chunks", {}):
chunk_content = chunk_data["content"].strip() chunk_content = chunk_data["content"].strip()
source_id = chunk_data["source_id"] source_id = chunk_data["source_id"]
tokens = len(encode_string_by_tiktoken(chunk_content, model_name=self.tiktoken_model_name))
chunk_order_index = 0 if "chunk_order_index" not in chunk_data.keys() else chunk_data["chunk_order_index"]
chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-") chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
chunk_entry = { chunk_entry = {
"content": chunk_content, "content": chunk_content,
"source_id": source_id, "source_id": source_id,
"tokens": len(encode_string_by_tiktoken(chunk_content)), "tokens": tokens,
"chunk_order_id": 0, "chunk_order_index": chunk_order_index,
"full_doc_id": source_id, "full_doc_id": source_id,
"status": DocStatus.PROCESSED, "status": DocStatus.PROCESSED,
} }