Renamed chunk_order_index and improve token calculation
This commit is contained in:
@@ -862,13 +862,15 @@ class LightRAG:
|
|||||||
for chunk_data in custom_kg.get("chunks", {}):
|
for chunk_data in custom_kg.get("chunks", {}):
|
||||||
chunk_content = chunk_data["content"].strip()
|
chunk_content = chunk_data["content"].strip()
|
||||||
source_id = chunk_data["source_id"]
|
source_id = chunk_data["source_id"]
|
||||||
|
tokens = len(encode_string_by_tiktoken(chunk_content, model_name=self.tiktoken_model_name))
|
||||||
|
chunk_order_index = 0 if "chunk_order_index" not in chunk_data.keys() else chunk_data["chunk_order_index"]
|
||||||
chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
|
chunk_id = compute_mdhash_id(chunk_content, prefix="chunk-")
|
||||||
|
|
||||||
chunk_entry = {
|
chunk_entry = {
|
||||||
"content": chunk_content,
|
"content": chunk_content,
|
||||||
"source_id": source_id,
|
"source_id": source_id,
|
||||||
"tokens": len(encode_string_by_tiktoken(chunk_content)),
|
"tokens": tokens,
|
||||||
"chunk_order_id": 0,
|
"chunk_order_index": chunk_order_index,
|
||||||
"full_doc_id": source_id,
|
"full_doc_id": source_id,
|
||||||
"status": DocStatus.PROCESSED,
|
"status": DocStatus.PROCESSED,
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user