feat: move query-related settings to env file for better configuration

• Add env vars for token and chunk settings
• Add token count logging for prompts
• Add token count logging for context
• Move hardcoded values to env variables
• Improve logging clarity and consistency
This commit is contained in:
yangdx
2025-02-16 19:26:57 +08:00
parent 8fdbcb0d3f
commit 601df31edf
4 changed files with 69 additions and 17 deletions

View File

@@ -642,9 +642,13 @@ async def kg_query(
history=history_context,
)
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}")
response = await use_model_func(
query,
system_prompt=sys_prompt,
@@ -730,6 +734,9 @@ async def extract_keywords_only(
query=text, examples=examples, language=language, history=history_context
)
len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}")
# 5. Call the LLM for keyword extraction
use_model_func = global_config["llm_model_func"]
result = await use_model_func(kw_prompt, keyword_extraction=True)
@@ -893,7 +900,9 @@ async def mix_kg_vector_query(
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
formatted_chunks.append(chunk_text)
logger.info(f"Truncate {len(chunks)} to {len(formatted_chunks)} chunks")
logger.info(
f"Truncate text chunks from {len(chunks)} to {len(formatted_chunks)}"
)
return "\n--New Chunk--\n".join(formatted_chunks)
except Exception as e:
logger.error(f"Error in get_vector_context: {e}")
@@ -926,6 +935,9 @@ async def mix_kg_vector_query(
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.info(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
# 6. Generate response
response = await use_model_func(
query,
@@ -1031,7 +1043,7 @@ async def _build_query_context(
if not entities_context.strip() and not relations_context.strip():
return None
return f"""
result = f"""
-----Entities-----
```csv
{entities_context}
@@ -1045,6 +1057,15 @@ async def _build_query_context(
{text_units_context}
```
"""
contex_tokens = len(encode_string_by_tiktoken(result))
entities_tokens = len(encode_string_by_tiktoken(entities_context))
relations_tokens = len(encode_string_by_tiktoken(relations_context))
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
logger.info(
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
)
return result
async def _get_node_data(
@@ -1089,7 +1110,7 @@ async def _get_node_data(
),
)
logger.info(
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
)
# build prompt
@@ -1222,6 +1243,10 @@ async def _find_most_related_text_unit_from_entities(
max_token_size=query_param.max_token_for_text_unit,
)
logger.info(
f"Truncate text chunks from {len(all_text_units_lookup)} to {len(all_text_units)}"
)
all_text_units = [t["data"] for t in all_text_units]
return all_text_units
@@ -1263,6 +1288,9 @@ async def _find_most_related_edges_from_entities(
key=lambda x: x["description"],
max_token_size=query_param.max_token_for_global_context,
)
logger.info(f"Truncate relations from {len(all_edges)} to {len(all_edges_data)}")
return all_edges_data
@@ -1310,11 +1338,13 @@ async def _get_edge_data(
edge_datas = sorted(
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
)
len_edge_datas = len(edge_datas)
edge_datas = truncate_list_by_token_size(
edge_datas,
key=lambda x: x["description"],
max_token_size=query_param.max_token_for_global_context,
)
logger.info(f"Truncate relations from {len_edge_datas} to {len(edge_datas)}")
use_entities, use_text_units = await asyncio.gather(
_find_most_related_entities_from_relationships(
@@ -1325,7 +1355,7 @@ async def _get_edge_data(
),
)
logger.info(
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
)
relations_section_list = [
@@ -1414,11 +1444,13 @@ async def _find_most_related_entities_from_relationships(
for k, n, d in zip(entity_names, node_datas, node_degrees)
]
len_node_datas = len(node_datas)
node_datas = truncate_list_by_token_size(
node_datas,
key=lambda x: x["description"],
max_token_size=query_param.max_token_for_local_context,
)
logger.info(f"Truncate entities from {len_node_datas} to {len(node_datas)}")
return node_datas
@@ -1474,6 +1506,10 @@ async def _find_related_text_unit_from_relationships(
max_token_size=query_param.max_token_for_text_unit,
)
logger.info(
f"Truncate text chunks from {len(valid_text_units)} to {len(truncated_text_units)}"
)
all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
return all_text_units
@@ -1541,7 +1577,8 @@ async def naive_query(
logger.warning("No chunks left after truncation")
return PROMPTS["fail_response"]
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
logger.info(f"Truncate text chunks from {len(chunks)} to {len(maybe_trun_chunks)}")
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
if query_param.only_need_context:
@@ -1564,6 +1601,9 @@ async def naive_query(
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.info(f"[naive_query]Prompt Tokens: {len_of_prompts}")
response = await use_model_func(
query,
system_prompt=sys_prompt,
@@ -1706,6 +1746,9 @@ async def kg_query_with_keywords(
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.info(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
response = await use_model_func(
query,
system_prompt=sys_prompt,