From 500d4feddaae56b823962416269164b0ed0524c9 Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Fri, 12 Apr 2024 14:37:10 +0300 Subject: [PATCH] fix(rebalance): fix start order of rebalance applications --- .../src/emqx_eviction_agent.app.src | 3 +- .../src/emqx_node_rebalance.app.src | 7 +- .../src/emqx_node_rebalance_api.erl | 108 +++++++++--------- .../test/emqx_node_rebalance_SUITE.erl | 2 +- .../test/emqx_node_rebalance_agent_SUITE.erl | 4 +- .../test/emqx_node_rebalance_api_SUITE.erl | 3 +- .../test/emqx_node_rebalance_cli_SUITE.erl | 2 +- .../emqx_node_rebalance_evacuation_SUITE.erl | 2 +- .../test/emqx_node_rebalance_purge_SUITE.erl | 1 - .../test/emqx_node_rebalance_status_SUITE.erl | 1 - changes/ee/fix-12871.en.md | 1 + mix.exs | 4 +- rebar.config.erl | 2 + 13 files changed, 73 insertions(+), 67 deletions(-) create mode 100644 changes/ee/fix-12871.en.md diff --git a/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src b/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src index 10a464f26..7e692bf9c 100644 --- a/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src +++ b/apps/emqx_eviction_agent/src/emqx_eviction_agent.app.src @@ -9,7 +9,8 @@ {applications, [ kernel, stdlib, - emqx_ctl + emqx_ctl, + emqx ]}, {mod, {emqx_eviction_agent_app, []}}, {env, []}, diff --git a/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src b/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src index beb5f2abb..e8967c556 100644 --- a/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src +++ b/apps/emqx_node_rebalance/src/emqx_node_rebalance.app.src @@ -1,6 +1,6 @@ {application, emqx_node_rebalance, [ {description, "EMQX Node Rebalance"}, - {vsn, "5.0.7"}, + {vsn, "5.0.8"}, {registered, [ emqx_node_rebalance_sup, emqx_node_rebalance, @@ -10,7 +10,10 @@ ]}, {applications, [ kernel, - stdlib + stdlib, + emqx, + emqx_ctl, + emqx_eviction_agent ]}, {mod, {emqx_node_rebalance_app, []}}, {env, []}, diff --git a/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl b/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl index a054cfe1f..35461ee5b 100644 --- a/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl +++ b/apps/emqx_node_rebalance/src/emqx_node_rebalance_api.erl @@ -423,7 +423,7 @@ param_node() -> fields(rebalance_start) -> [ - {"wait_health_check", + {wait_health_check, mk( emqx_schema:timeout_duration_s(), #{ @@ -431,7 +431,7 @@ fields(rebalance_start) -> required => false } )}, - {"conn_evict_rate", + {conn_evict_rate, mk( pos_integer(), #{ @@ -439,7 +439,7 @@ fields(rebalance_start) -> required => false } )}, - {"sess_evict_rate", + {sess_evict_rate, mk( pos_integer(), #{ @@ -447,7 +447,7 @@ fields(rebalance_start) -> required => false } )}, - {"abs_conn_threshold", + {abs_conn_threshold, mk( pos_integer(), #{ @@ -455,7 +455,7 @@ fields(rebalance_start) -> required => false } )}, - {"rel_conn_threshold", + {rel_conn_threshold, mk( number(), #{ @@ -464,7 +464,7 @@ fields(rebalance_start) -> validator => [fun(Value) -> Value > 1.0 end] } )}, - {"abs_sess_threshold", + {abs_sess_threshold, mk( pos_integer(), #{ @@ -472,7 +472,7 @@ fields(rebalance_start) -> required => false } )}, - {"rel_sess_threshold", + {rel_sess_threshold, mk( number(), #{ @@ -481,7 +481,7 @@ fields(rebalance_start) -> validator => [fun(Value) -> Value > 1.0 end] } )}, - {"wait_takeover", + {wait_takeover, mk( emqx_schema:timeout_duration_s(), #{ @@ -489,7 +489,7 @@ fields(rebalance_start) -> required => false } )}, - {"nodes", + {nodes, mk( list(binary()), #{ @@ -501,7 +501,7 @@ fields(rebalance_start) -> ]; fields(rebalance_evacuation_start) -> [ - {"wait_health_check", + {wait_health_check, mk( emqx_schema:timeout_duration_s(), #{ @@ -509,7 +509,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"conn_evict_rate", + {conn_evict_rate, mk( pos_integer(), #{ @@ -517,7 +517,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"sess_evict_rate", + {sess_evict_rate, mk( pos_integer(), #{ @@ -525,7 +525,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"redirect_to", + {redirect_to, mk( binary(), #{ @@ -533,7 +533,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"wait_takeover", + {wait_takeover, mk( emqx_schema:timeout_duration_s(), #{ @@ -541,7 +541,7 @@ fields(rebalance_evacuation_start) -> required => false } )}, - {"migrate_to", + {migrate_to, mk( nonempty_list(binary()), #{ @@ -552,7 +552,7 @@ fields(rebalance_evacuation_start) -> ]; fields(purge_start) -> [ - {"purge_rate", + {purge_rate, mk( pos_integer(), #{ @@ -563,7 +563,7 @@ fields(purge_start) -> ]; fields(local_status_disabled) -> [ - {"status", + {status, mk( disabled, #{ @@ -574,7 +574,7 @@ fields(local_status_disabled) -> ]; fields(local_status_enabled) -> [ - {"status", + {status, mk( enabled, #{ @@ -582,7 +582,7 @@ fields(local_status_enabled) -> required => true } )}, - {"process", + {process, mk( hoconsc:enum([rebalance, evacuation]), #{ @@ -590,7 +590,7 @@ fields(local_status_enabled) -> required => true } )}, - {"state", + {state, mk( atom(), #{ @@ -598,7 +598,7 @@ fields(local_status_enabled) -> required => true } )}, - {"coordinator_node", + {coordinator_node, mk( binary(), #{ @@ -606,7 +606,7 @@ fields(local_status_enabled) -> required => false } )}, - {"connection_eviction_rate", + {connection_eviction_rate, mk( pos_integer(), #{ @@ -614,7 +614,7 @@ fields(local_status_enabled) -> required => false } )}, - {"session_eviction_rate", + {session_eviction_rate, mk( pos_integer(), #{ @@ -622,7 +622,7 @@ fields(local_status_enabled) -> required => false } )}, - {"connection_goal", + {connection_goal, mk( non_neg_integer(), #{ @@ -630,7 +630,7 @@ fields(local_status_enabled) -> required => false } )}, - {"session_goal", + {session_goal, mk( non_neg_integer(), #{ @@ -638,7 +638,7 @@ fields(local_status_enabled) -> required => false } )}, - {"disconnected_session_goal", + {disconnected_session_goal, mk( non_neg_integer(), #{ @@ -646,7 +646,7 @@ fields(local_status_enabled) -> required => false } )}, - {"session_recipients", + {session_recipients, mk( list(binary()), #{ @@ -654,7 +654,7 @@ fields(local_status_enabled) -> required => false } )}, - {"recipients", + {recipients, mk( list(binary()), #{ @@ -662,7 +662,7 @@ fields(local_status_enabled) -> required => false } )}, - {"stats", + {stats, mk( ref(status_stats), #{ @@ -673,7 +673,7 @@ fields(local_status_enabled) -> ]; fields(status_stats) -> [ - {"initial_connected", + {initial_connected, mk( non_neg_integer(), #{ @@ -681,7 +681,7 @@ fields(status_stats) -> required => true } )}, - {"current_connected", + {current_connected, mk( non_neg_integer(), #{ @@ -689,7 +689,7 @@ fields(status_stats) -> required => true } )}, - {"initial_sessions", + {initial_sessions, mk( non_neg_integer(), #{ @@ -697,7 +697,7 @@ fields(status_stats) -> required => true } )}, - {"current_sessions", + {current_sessions, mk( non_neg_integer(), #{ @@ -705,7 +705,7 @@ fields(status_stats) -> required => true } )}, - {"current_disconnected_sessions", + {current_disconnected_sessions, mk( non_neg_integer(), #{ @@ -716,11 +716,11 @@ fields(status_stats) -> ]; fields(global_coordinator_status) -> without( - ["status", "process", "session_goal", "session_recipients", "stats"], + [status, process, session_goal, session_recipients, stats], fields(local_status_enabled) ) ++ [ - {"donors", + {donors, mk( list(binary()), #{ @@ -728,7 +728,7 @@ fields(global_coordinator_status) -> required => false } )}, - {"donor_conn_avg", + {donor_conn_avg, mk( non_neg_integer(), #{ @@ -736,7 +736,7 @@ fields(global_coordinator_status) -> required => false } )}, - {"donor_sess_avg", + {donor_sess_avg, mk( non_neg_integer(), #{ @@ -744,7 +744,7 @@ fields(global_coordinator_status) -> required => false } )}, - {"node", + {node, mk( binary(), #{ @@ -754,9 +754,9 @@ fields(global_coordinator_status) -> )} ]; fields(global_evacuation_status) -> - without(["status", "process"], fields(local_status_enabled)) ++ + without([status, process], fields(local_status_enabled)) ++ [ - {"node", + {node, mk( binary(), #{ @@ -768,19 +768,19 @@ fields(global_evacuation_status) -> fields(global_purge_status) -> without( [ - "status", - "process", - "connection_eviction_rate", - "session_eviction_rate", - "connection_goal", - "disconnected_session_goal", - "session_recipients", - "recipients" + status, + process, + connection_eviction_rate, + session_eviction_rate, + connection_goal, + disconnected_session_goal, + session_recipients, + recipients ], fields(local_status_enabled) ) ++ [ - {"purge_rate", + {purge_rate, mk( pos_integer(), #{ @@ -788,7 +788,7 @@ fields(global_purge_status) -> required => false } )}, - {"node", + {node, mk( binary(), #{ @@ -799,7 +799,7 @@ fields(global_purge_status) -> ]; fields(global_status) -> [ - {"evacuations", + {evacuations, mk( hoconsc:array(ref(global_evacuation_status)), #{ @@ -807,7 +807,7 @@ fields(global_status) -> required => false } )}, - {"purges", + {purges, mk( hoconsc:array(ref(global_purge_status)), #{ @@ -815,7 +815,7 @@ fields(global_status) -> required => false } )}, - {"rebalances", + {rebalances, mk( hoconsc:array(ref(global_coordinator_status)), #{ diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl index 4f0fbe3c4..04a74bf28 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_SUITE.erl @@ -48,7 +48,7 @@ init_per_testcase(Case, Config) -> ClusterNodes = start_cluster( Config, NodeNames, - [emqx, emqx_eviction_agent, emqx_node_rebalance] + [emqx, emqx_node_rebalance] ), ok = snabbkaffe:start_trace(), [{cluster_nodes, ClusterNodes} | Config]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl index ac5f809bf..bd15b6475 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_agent_SUITE.erl @@ -38,7 +38,7 @@ groups() -> ]. init_per_suite(Config) -> - Apps = emqx_cth_suite:start([emqx, emqx_eviction_agent, emqx_node_rebalance], #{ + Apps = emqx_cth_suite:start([emqx, emqx_node_rebalance], #{ work_dir => ?config(priv_dir, Config) }), [{apps, Apps} | Config]. @@ -60,7 +60,7 @@ init_per_testcase(Case, Config) -> ClusterNodes = emqx_cth_cluster:start( [ {case_specific_node_name(?MODULE, Case), #{ - apps => [emqx, emqx_eviction_agent, emqx_node_rebalance] + apps => [emqx, emqx_node_rebalance] }} ], #{work_dir => emqx_cth_suite:work_dir(Case, Config)} diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl index a652dea0a..06e119532 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_api_SUITE.erl @@ -29,7 +29,7 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - Apps = emqx_cth_suite:start([emqx, emqx_eviction_agent, emqx_node_rebalance], #{ + Apps = emqx_cth_suite:start([emqx, emqx_node_rebalance], #{ work_dir => ?config(priv_dir, Config) }), [{apps, Apps} | Config]. @@ -548,7 +548,6 @@ app_specs() -> #{enable => true} } }}, - emqx_eviction_agent, emqx_node_rebalance ]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl index 55542d320..3980b4a45 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_cli_SUITE.erl @@ -15,7 +15,7 @@ [emqtt_connect_many/2, stop_many/1, case_specific_node_name/3] ). --define(START_APPS, [emqx, emqx_eviction_agent, emqx_node_rebalance]). +-define(START_APPS, [emqx, emqx_node_rebalance]). all() -> emqx_common_test_helpers:all(?MODULE). diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl index 4c0d13788..d27f6d6d3 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_evacuation_SUITE.erl @@ -70,7 +70,7 @@ init_per_testcase(Case, Config) -> case_specific_node_name(?MODULE, Case, '_recipient') ] end, - ClusterNodes = start_cluster(Config, NodeNames, [emqx, emqx_eviction_agent, emqx_node_rebalance]), + ClusterNodes = start_cluster(Config, NodeNames, [emqx, emqx_node_rebalance]), ok = snabbkaffe:start_trace(), [{cluster_nodes, ClusterNodes} | Config]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl index 31844c5d0..0daeac106 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_purge_SUITE.erl @@ -117,7 +117,6 @@ app_specs() -> config => #{delayed => #{enable => true}} }}, - emqx_eviction_agent, emqx_node_rebalance ]. diff --git a/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl b/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl index 888e63beb..6a7f20c4e 100644 --- a/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl +++ b/apps/emqx_node_rebalance/test/emqx_node_rebalance_status_SUITE.erl @@ -32,7 +32,6 @@ init_per_suite(Config) -> Apps = [ emqx_conf, emqx, - emqx_eviction_agent, emqx_node_rebalance ], Cluster = [ diff --git a/changes/ee/fix-12871.en.md b/changes/ee/fix-12871.en.md new file mode 100644 index 000000000..5b7520645 --- /dev/null +++ b/changes/ee/fix-12871.en.md @@ -0,0 +1 @@ +Fix startup process of evacuated node. Previously, if a node was evacuated and stoped without stopping evacuation, it would not start back. diff --git a/mix.exs b/mix.exs index 552cc75c9..0666601fc 100644 --- a/mix.exs +++ b/mix.exs @@ -332,7 +332,9 @@ defmodule EMQXUmbrella.MixProject do :emqx_s3, :emqx_opentelemetry, :emqx_durable_storage, - :rabbit_common + :rabbit_common, + :emqx_eviction_agent, + :emqx_node_rebalance ], steps: steps, strip_beams: false diff --git a/rebar.config.erl b/rebar.config.erl index a81d162a9..35f76b187 100644 --- a/rebar.config.erl +++ b/rebar.config.erl @@ -116,6 +116,8 @@ is_community_umbrella_app("apps/emqx_gateway_ocpp") -> false; is_community_umbrella_app("apps/emqx_gateway_jt808") -> false; is_community_umbrella_app("apps/emqx_bridge_syskeeper") -> false; is_community_umbrella_app("apps/emqx_message_validation") -> false; +is_community_umbrella_app("apps/emqx_eviction_agent") -> false; +is_community_umbrella_app("apps/emqx_node_rebalance") -> false; is_community_umbrella_app(_) -> true. %% BUILD_WITHOUT_JQ