fixing bug

This commit is contained in:
Yazington
2024-10-26 00:11:21 -04:00
parent 6527e75e81
commit eec29d041a
2 changed files with 17 additions and 15 deletions

View File

@@ -85,7 +85,9 @@ class LightRAG:
# LLM # LLM
llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete# llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete#
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' llm_model_name: str = (
"meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
)
llm_model_max_token_size: int = 32768 llm_model_max_token_size: int = 32768
llm_model_max_async: int = 16 llm_model_max_async: int = 16
@@ -208,7 +210,7 @@ class LightRAG:
logger.info("[Entity Extraction]...") logger.info("[Entity Extraction]...")
maybe_new_kg = await extract_entities( maybe_new_kg = await extract_entities(
inserting_chunks, inserting_chunks,
knwoledge_graph_inst=self.chunk_entity_relation_graph, knowledge_graph_inst=self.chunk_entity_relation_graph,
entity_vdb=self.entities_vdb, entity_vdb=self.entities_vdb,
relationships_vdb=self.relationships_vdb, relationships_vdb=self.relationships_vdb,
global_config=asdict(self), global_config=asdict(self),

View File

@@ -124,14 +124,14 @@ async def _handle_single_relationship_extraction(
async def _merge_nodes_then_upsert( async def _merge_nodes_then_upsert(
entity_name: str, entity_name: str,
nodes_data: list[dict], nodes_data: list[dict],
knwoledge_graph_inst: BaseGraphStorage, knowledge_graph_inst: BaseGraphStorage,
global_config: dict, global_config: dict,
): ):
already_entitiy_types = [] already_entitiy_types = []
already_source_ids = [] already_source_ids = []
already_description = [] already_description = []
already_node = await knwoledge_graph_inst.get_node(entity_name) already_node = await knowledge_graph_inst.get_node(entity_name)
if already_node is not None: if already_node is not None:
already_entitiy_types.append(already_node["entity_type"]) already_entitiy_types.append(already_node["entity_type"])
already_source_ids.extend( already_source_ids.extend(
@@ -160,7 +160,7 @@ async def _merge_nodes_then_upsert(
description=description, description=description,
source_id=source_id, source_id=source_id,
) )
await knwoledge_graph_inst.upsert_node( await knowledge_graph_inst.upsert_node(
entity_name, entity_name,
node_data=node_data, node_data=node_data,
) )
@@ -172,7 +172,7 @@ async def _merge_edges_then_upsert(
src_id: str, src_id: str,
tgt_id: str, tgt_id: str,
edges_data: list[dict], edges_data: list[dict],
knwoledge_graph_inst: BaseGraphStorage, knowledge_graph_inst: BaseGraphStorage,
global_config: dict, global_config: dict,
): ):
already_weights = [] already_weights = []
@@ -180,8 +180,8 @@ async def _merge_edges_then_upsert(
already_description = [] already_description = []
already_keywords = [] already_keywords = []
if await knwoledge_graph_inst.has_edge(src_id, tgt_id): if await knowledge_graph_inst.has_edge(src_id, tgt_id):
already_edge = await knwoledge_graph_inst.get_edge(src_id, tgt_id) already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
already_weights.append(already_edge["weight"]) already_weights.append(already_edge["weight"])
already_source_ids.extend( already_source_ids.extend(
split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP]) split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
@@ -202,8 +202,8 @@ async def _merge_edges_then_upsert(
set([dp["source_id"] for dp in edges_data] + already_source_ids) set([dp["source_id"] for dp in edges_data] + already_source_ids)
) )
for need_insert_id in [src_id, tgt_id]: for need_insert_id in [src_id, tgt_id]:
if not (await knwoledge_graph_inst.has_node(need_insert_id)): if not (await knowledge_graph_inst.has_node(need_insert_id)):
await knwoledge_graph_inst.upsert_node( await knowledge_graph_inst.upsert_node(
need_insert_id, need_insert_id,
node_data={ node_data={
"source_id": source_id, "source_id": source_id,
@@ -214,7 +214,7 @@ async def _merge_edges_then_upsert(
description = await _handle_entity_relation_summary( description = await _handle_entity_relation_summary(
(src_id, tgt_id), description, global_config (src_id, tgt_id), description, global_config
) )
await knwoledge_graph_inst.upsert_edge( await knowledge_graph_inst.upsert_edge(
src_id, src_id,
tgt_id, tgt_id,
edge_data=dict( edge_data=dict(
@@ -237,7 +237,7 @@ async def _merge_edges_then_upsert(
async def extract_entities( async def extract_entities(
chunks: dict[str, TextChunkSchema], chunks: dict[str, TextChunkSchema],
knwoledge_graph_inst: BaseGraphStorage, knowledge_graph_inst: BaseGraphStorage,
entity_vdb: BaseVectorStorage, entity_vdb: BaseVectorStorage,
relationships_vdb: BaseVectorStorage, relationships_vdb: BaseVectorStorage,
global_config: dict, global_config: dict,
@@ -341,13 +341,13 @@ async def extract_entities(
maybe_edges[tuple(sorted(k))].extend(v) maybe_edges[tuple(sorted(k))].extend(v)
all_entities_data = await asyncio.gather( all_entities_data = await asyncio.gather(
*[ *[
_merge_nodes_then_upsert(k, v, knwoledge_graph_inst, global_config) _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
for k, v in maybe_nodes.items() for k, v in maybe_nodes.items()
] ]
) )
all_relationships_data = await asyncio.gather( all_relationships_data = await asyncio.gather(
*[ *[
_merge_edges_then_upsert(k[0], k[1], v, knwoledge_graph_inst, global_config) _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
for k, v in maybe_edges.items() for k, v in maybe_edges.items()
] ]
) )
@@ -384,7 +384,7 @@ async def extract_entities(
} }
await relationships_vdb.upsert(data_for_vdb) await relationships_vdb.upsert(data_for_vdb)
return knwoledge_graph_inst return knowledge_graph_inst
async def local_query( async def local_query(