Convert parallel queries to serial execution

2025-04-16 17:55:49 +08:00
parent 2c7e8a5526
commit 0afe35a9fd
2 changed files with 27 additions and 23 deletions
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -142,6 +142,9 @@ class PostgreSQLDB:
        with_age: bool = False,
        graph_name: str | None = None,
    ) -> dict[str, Any] | None | list[dict[str, Any]]:
+        # start_time = time.time()
+        # logger.info(f"PostgreSQL, Querying:\n{sql}")
+
        async with self.pool.acquire() as connection:  # type: ignore
            if with_age and graph_name:
                await self.configure_age(connection, graph_name)  # type: ignore
@@ -166,6 +169,11 @@ class PostgreSQLDB:
                        data = dict(zip(columns, rows[0]))
                    else:
                        data = None
+
+                # query_time = time.time() - start_time
+                # logger.info(f"PostgreSQL, Query result len: {len(data)}")
+                # logger.info(f"PostgreSQL, Query execution time: {query_time:.4f}s")
+
                return data
            except Exception as e:
                logger.error(f"PostgreSQL database, error:{e}")
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1255,21 +1255,19 @@ async def _build_query_context(
            query_param,
        )
    else:  # hybrid mode
-        ll_data, hl_data = await asyncio.gather(
-            _get_node_data(
-                ll_keywords,
-                knowledge_graph_inst,
-                entities_vdb,
-                text_chunks_db,
-                query_param,
-            ),
-            _get_edge_data(
-                hl_keywords,
-                knowledge_graph_inst,
-                relationships_vdb,
-                text_chunks_db,
-                query_param,
-            ),
+        ll_data = await _get_node_data(
+            ll_keywords,
+            knowledge_graph_inst,
+            entities_vdb,
+            text_chunks_db,
+            query_param,
+        )
+        hl_data = await _get_edge_data(
+            hl_keywords,
+            knowledge_graph_inst,
+            relationships_vdb,
+            text_chunks_db,
+            query_param,
        )

        (
@@ -1351,13 +1349,11 @@ async def _get_node_data(
        if n is not None
    ]  # what is this text_chunks_db doing.  dont remember it in airvx.  check the diagram.
    # get entitytext chunk
-    use_text_units, use_relations = await asyncio.gather(
-        _find_most_related_text_unit_from_entities(
-            node_datas, query_param, text_chunks_db, knowledge_graph_inst
-        ),
-        _find_most_related_edges_from_entities(
-            node_datas, query_param, knowledge_graph_inst
-        ),
+    use_text_units = await _find_most_related_text_unit_from_entities(
+        node_datas, query_param, text_chunks_db, knowledge_graph_inst
+    )
+    use_relations = await _find_most_related_edges_from_entities(
+        node_datas, query_param, knowledge_graph_inst
    )

    len_node_datas = len(node_datas)
@@ -1502,7 +1498,7 @@ async def _find_most_related_text_unit_from_entities(
                all_text_units_lookup[c_id] = index
                tasks.append((c_id, index, this_edges))

-    # Process in batches of 25 tasks at a time to avoid overwhelming resources
+    # Process in batches tasks at a time to avoid overwhelming resources
    batch_size = 5
    results = []