diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 60ac6343f..70cf636e7 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -28,7 +28,7 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.1"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.2"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.42.1"}}}, {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.3"}}}, diff --git a/apps/emqx_management/src/emqx_management.app.src b/apps/emqx_management/src/emqx_management.app.src index bd596ffd4..bc2425a55 100644 --- a/apps/emqx_management/src/emqx_management.app.src +++ b/apps/emqx_management/src/emqx_management.app.src @@ -2,7 +2,7 @@ {application, emqx_management, [ {description, "EMQX Management API and CLI"}, % strict semver, bump manually! - {vsn, "5.1.0"}, + {vsn, "5.1.1"}, {modules, []}, {registered, [emqx_management_sup]}, {applications, [ diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index ddbc60d5c..2af3a8397 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -108,6 +108,7 @@ cluster(["join", SNode]) -> emqx_ctl:print("Failed to join the cluster: ~0p~n", [Error]) end; cluster(["leave"]) -> + _ = maybe_disable_autocluster(), case mria:leave() of ok -> emqx_ctl:print("Leave the cluster successfully.~n"), @@ -139,12 +140,15 @@ cluster(["status"]) -> cluster(["status", "--json"]) -> Info = sort_map_list_fields(cluster_info()), emqx_ctl:print("~ts~n", [emqx_logger_jsonfmt:best_effort_json(Info)]); +cluster(["discovery", "enable"]) -> + enable_autocluster(); cluster(_) -> emqx_ctl:usage([ {"cluster join ", "Join the cluster"}, {"cluster leave", "Leave the cluster"}, {"cluster force-leave ", "Force the node leave from cluster"}, - {"cluster status [--json]", "Cluster status"} + {"cluster status [--json]", "Cluster status"}, + {"cluster discovery enable", "Enable and run automatic cluster discovery (if configured)"} ]). %% sort lists for deterministic output @@ -163,6 +167,25 @@ sort_map_list_field(Field, Map) -> _ -> Map end. +enable_autocluster() -> + ok = ekka:enable_autocluster(), + _ = ekka:autocluster(emqx), + emqx_ctl:print("Automatic cluster discovery enabled.~n"). + +maybe_disable_autocluster() -> + case ekka:autocluster_enabled() of + true -> + ok = ekka:disable_autocluster(), + emqx_ctl:print( + "Automatic cluster discovery is disabled on this node: ~p to avoid" + " re-joining the same cluster again, if the node is not stopped soon." + " To enable it run: 'emqx ctl cluster discovery enable' or restart the node.~n", + [node()] + ); + false -> + ok + end. + %%-------------------------------------------------------------------- %% @doc Query clients diff --git a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl index c81881c95..c6f00bff0 100644 --- a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl @@ -19,6 +19,7 @@ -compile(nowarn_export_all). -include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). all() -> emqx_common_test_helpers:all(?MODULE). @@ -31,6 +32,47 @@ init_per_suite(Config) -> end_per_suite(_) -> emqx_mgmt_api_test_util:end_suite([emqx_management, emqx_conf]). +init_per_testcase(t_autocluster_leave = TC, Config) -> + [Core1, Core2, Core3, Repl] = + Nodes = [ + t_autocluster_leave_core1, + t_autocluster_leave_core2, + t_autocluster_leave_core3, + t_autocluster_leave_replicant + ], + + NodeNames = [emqx_cth_cluster:node_name(N) || N <- Nodes], + AppSpec = [ + emqx, + {emqx_conf, #{ + config => #{ + cluster => #{ + discovery_strategy => static, + static => #{seeds => NodeNames} + } + } + }}, + emqx_management + ], + Cluster = emqx_cth_cluster:start( + [ + {Core1, #{role => core, apps => AppSpec}}, + {Core2, #{role => core, apps => AppSpec}}, + {Core3, #{role => core, apps => AppSpec}}, + {Repl, #{role => replicant, apps => AppSpec}} + ], + #{work_dir => emqx_cth_suite:work_dir(TC, Config)} + ), + [{cluster, Cluster} | Config]; +init_per_testcase(_TC, Config) -> + Config. + +end_per_testcase(_TC, Config) -> + case ?config(cluster, Config) of + undefined -> ok; + Cluster -> emqx_cth_cluster:stop(Cluster) + end. + t_status(_Config) -> emqx_ctl:run_command([]), emqx_ctl:run_command(["status"]), @@ -263,3 +305,44 @@ t_admin(_Config) -> %% admins passwd # Reset dashboard user password %% admins del # Delete dashboard user ok. + +t_autocluster_leave(Config) -> + [Core1, Core2, Core3, Repl] = Cluster = ?config(cluster, Config), + %% Mria membership updates are async, makes sense to wait a little + timer:sleep(300), + ClusterView = [lists:sort(rpc:call(N, emqx, running_nodes, [])) || N <- Cluster], + [View1, View2, View3, View4] = ClusterView, + ?assertEqual(lists:sort(Cluster), View1), + ?assertEqual(View1, View2), + ?assertEqual(View1, View3), + ?assertEqual(View1, View4), + + rpc:call(Core3, emqx_mgmt_cli, cluster, [["leave"]]), + timer:sleep(1000), + %% Replicant node may still discover and join Core3 which is now split from [Core1, Core2], + %% but it's expected to choose a bigger cluster of [Core1, Core2].. + ?assertMatch([Core3], rpc:call(Core3, emqx, running_nodes, [])), + ?assertEqual(undefined, rpc:call(Core1, erlang, whereis, [ekka_autocluster])), + ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Core1, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Core2, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Core2, Repl]), rpc:call(Repl, emqx, running_nodes, [])), + + rpc:call(Repl, emqx_mgmt_cli, cluster, [["leave"]]), + timer:sleep(1000), + ?assertEqual(lists:sort([Core1, Core2]), rpc:call(Core1, emqx, running_nodes, [])), + ?assertEqual(lists:sort([Core1, Core2]), rpc:call(Core2, emqx, running_nodes, [])), + + rpc:call(Core3, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), + rpc:call(Repl, emqx_mgmt_cli, cluster, [["discovery", "enable"]]), + %% core nodes will join and restart asyncly, may need more time to re-cluster + ?assertEqual( + ok, + emqx_common_test_helpers:wait_for( + ?FUNCTION_NAME, + ?LINE, + fun() -> + [lists:sort(rpc:call(N, emqx, running_nodes, [])) || N <- Cluster] =:= ClusterView + end, + 10_000 + ) + ). diff --git a/changes/ce/fix-12802.en.md b/changes/ce/fix-12802.en.md new file mode 100644 index 000000000..f63603a97 --- /dev/null +++ b/changes/ce/fix-12802.en.md @@ -0,0 +1,3 @@ +Improve cluster discovery behaviour when a node is manually removed from a cluster using 'emqx ctl cluster leave' command. +Previously, if the configured cluster 'discovery_strategy' was not 'manual', the left node might re-discover and re-join the same cluster shortly after it left (unless it was stopped). +After this change, 'cluster leave' command disables automatic cluster_discovery, so that the left node won't re-join the same cluster again. Cluster discovery can be re-enabled by running 'emqx ctl discovery enable` or by restarting the left node. diff --git a/mix.exs b/mix.exs index 638bbb284..20df56e90 100644 --- a/mix.exs +++ b/mix.exs @@ -55,7 +55,7 @@ defmodule EMQXUmbrella.MixProject do {:cowboy, github: "emqx/cowboy", tag: "2.9.2", override: true}, {:esockd, github: "emqx/esockd", tag: "5.11.1", override: true}, {:rocksdb, github: "emqx/erlang-rocksdb", tag: "1.8.0-emqx-2", override: true}, - {:ekka, github: "emqx/ekka", tag: "0.19.1", override: true}, + {:ekka, github: "emqx/ekka", tag: "0.19.2", override: true}, {:gen_rpc, github: "emqx/gen_rpc", tag: "3.3.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.12", override: true}, {:minirest, github: "emqx/minirest", tag: "1.4.0", override: true}, diff --git a/rebar.config b/rebar.config index 238dca515..1dd17c4e7 100644 --- a/rebar.config +++ b/rebar.config @@ -83,7 +83,7 @@ {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-2"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.1"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.19.2"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.12"}}}, {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.4.0"}}},