fix(emqx_machine): Fix start/stop callbacks (#5969)
* fix(emqx_machine): Fix start/stop callbacks * chore(ekka): Bump version to 0.11.1 * fix(router): Wait for the tables * fix(emqx_cluster_rpc): Stop cluster RPC when joining a cluster * fix(emqx_app): Fix a deadlock when joining the cluster * fix(emqx_telemetry): Wait for mnesia tables * test(ct_helper): Start ekka before emqx
This commit is contained in:
parent
2f2a093150
commit
6697b9fa42
|
@ -15,7 +15,7 @@
|
|||
, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}}
|
||||
, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.8.3"}}}
|
||||
, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.0"}}}
|
||||
, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.11.0"}}}
|
||||
, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.11.1"}}}
|
||||
, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.5.1"}}}
|
||||
, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.20.3"}}}
|
||||
, {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}}
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
start(_Type, _Args) ->
|
||||
ok = maybe_load_config(),
|
||||
ok = maybe_start_quicer(),
|
||||
ensure_ekka_started(),
|
||||
wait_boot_shards(),
|
||||
{ok, Sup} = emqx_sup:start_link(),
|
||||
ok = maybe_start_listeners(),
|
||||
ok = emqx_alarm_handler:load(),
|
||||
|
@ -55,8 +55,7 @@ prep_stop(_State) ->
|
|||
|
||||
stop(_State) -> ok.
|
||||
|
||||
ensure_ekka_started() ->
|
||||
ekka:start(),
|
||||
wait_boot_shards() ->
|
||||
ok = mria_rlog:wait_for_shards(?BOOT_SHARDS, infinity).
|
||||
|
||||
%% @doc Call this function to make emqx boot without loading config,
|
||||
|
|
|
@ -92,6 +92,7 @@ monitor(Node) when is_atom(Node) ->
|
|||
|
||||
init([]) ->
|
||||
ok = ekka:monitor(membership),
|
||||
_ = mria:wait_for_tables([?ROUTING_NODE]),
|
||||
{ok, _} = mnesia:subscribe({table, ?ROUTING_NODE, simple}),
|
||||
Nodes = lists:foldl(
|
||||
fun(Node, Acc) ->
|
||||
|
|
|
@ -133,6 +133,7 @@ start_apps(Apps, Handler) when is_function(Handler) ->
|
|||
%% Load all application code to beam vm first
|
||||
%% Because, minirest, ekka etc.. application will scan these modules
|
||||
lists:foreach(fun load/1, [emqx | Apps]),
|
||||
ekka:start(),
|
||||
lists:foreach(fun(App) -> start_app(App, Handler) end, [emqx | Apps]).
|
||||
|
||||
load(App) ->
|
||||
|
@ -195,7 +196,7 @@ generate_config(SchemaModule, ConfigFile) when is_atom(SchemaModule) ->
|
|||
|
||||
-spec(stop_apps(list()) -> ok).
|
||||
stop_apps(Apps) ->
|
||||
[application:stop(App) || App <- Apps ++ [emqx, mria, mnesia]],
|
||||
[application:stop(App) || App <- Apps ++ [emqx, ekka, mria, mnesia]],
|
||||
ok.
|
||||
|
||||
%% backward compatible
|
||||
|
|
|
@ -135,6 +135,7 @@ skip_failed_commit(Node) ->
|
|||
|
||||
%% @private
|
||||
init([Node, RetryMs]) ->
|
||||
_ = mria:wait_for_tables([?CLUSTER_MFA]),
|
||||
{ok, _} = mnesia:subscribe({table, ?CLUSTER_MFA, simple}),
|
||||
{ok, #{node => Node, retry_interval => RetryMs}, {continue, ?CATCH_UP}}.
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
-include_lib("emqx/include/logger.hrl").
|
||||
|
||||
-export([post_boot/0]).
|
||||
-export([stop_apps/1, ensure_apps_started/0]).
|
||||
-export([stop_apps/0, ensure_apps_started/0]).
|
||||
-export([sorted_reboot_apps/0]).
|
||||
-export([start_autocluster/0]).
|
||||
|
||||
|
@ -42,15 +42,16 @@ print_vsn() ->
|
|||
|
||||
|
||||
start_autocluster() ->
|
||||
ekka:callback(prepare, fun ?MODULE:stop_apps/1),
|
||||
ekka:callback(reboot, fun ?MODULE:ensure_apps_started/0),
|
||||
ekka:callback(stop, fun emqx_machine_boot:stop_apps/0),
|
||||
ekka:callback(start, fun emqx_machine_boot:ensure_apps_started/0),
|
||||
_ = ekka:autocluster(emqx), %% returns 'ok' or a pid or 'any()' as in spec
|
||||
ok.
|
||||
|
||||
stop_apps(Reason) ->
|
||||
?SLOG(info, #{msg => "stopping_apps", reason => Reason}),
|
||||
stop_apps() ->
|
||||
?SLOG(notice, #{msg => "stopping_emqx_apps"}),
|
||||
_ = emqx_alarm_handler:unload(),
|
||||
lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())).
|
||||
lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())),
|
||||
emqx_machine_sup:stop_cluster_rpc().
|
||||
|
||||
stop_one_app(App) ->
|
||||
?SLOG(debug, #{msg => "stopping_app", app => App}),
|
||||
|
@ -64,8 +65,11 @@ stop_one_app(App) ->
|
|||
reason => E})
|
||||
end.
|
||||
|
||||
|
||||
ensure_apps_started() ->
|
||||
?SLOG(notice, #{msg => "(re)starting_emqx_apps"}),
|
||||
%% FIXME: Hack spawning the cluster RPC asynchronously to avoid a
|
||||
%% deadlock somewhere in EMQ X startup
|
||||
spawn_link(fun() -> emqx_machine_sup:start_cluster_rpc() end),
|
||||
lists:foreach(fun start_one_app/1, sorted_reboot_apps()).
|
||||
|
||||
start_one_app(App) ->
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
-behaviour(supervisor).
|
||||
|
||||
-export([ start_link/0
|
||||
, stop_cluster_rpc/0
|
||||
, start_cluster_rpc/0
|
||||
]).
|
||||
|
||||
-export([init/1]).
|
||||
|
@ -28,6 +30,26 @@
|
|||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
stop_cluster_rpc() ->
|
||||
case whereis(?MODULE) of
|
||||
undefined ->
|
||||
ok;
|
||||
_ ->
|
||||
_ = supervisor:terminate_child(?MODULE, emqx_cluster_rpc_handler),
|
||||
_ = supervisor:terminate_child(?MODULE, emqx_cluster_rpc),
|
||||
ok
|
||||
end.
|
||||
|
||||
start_cluster_rpc() ->
|
||||
case whereis(?MODULE) of
|
||||
undefined ->
|
||||
ok;
|
||||
_ ->
|
||||
ensure_running(emqx_cluster_rpc),
|
||||
ensure_running(emqx_cluster_rpc_handler),
|
||||
ok
|
||||
end.
|
||||
|
||||
init([]) ->
|
||||
GlobalGC = child_worker(emqx_global_gc, [], permanent),
|
||||
Terminator = child_worker(emqx_machine_terminator, [], transient),
|
||||
|
@ -52,3 +74,13 @@ child_worker(M, Func, Args, Restart) ->
|
|||
type => worker,
|
||||
modules => [M]
|
||||
}.
|
||||
|
||||
ensure_running(Id) ->
|
||||
%% Assuming Id == locally registered name
|
||||
case whereis(Id) of
|
||||
undefined ->
|
||||
_ = supervisor:restart_child(?MODULE, Id),
|
||||
ok;
|
||||
_ ->
|
||||
ok
|
||||
end.
|
||||
|
|
|
@ -80,7 +80,7 @@ handle_cast(_Cast, State) ->
|
|||
|
||||
handle_call(?DO_IT, _From, State) ->
|
||||
try
|
||||
emqx_machine_boot:stop_apps(normal)
|
||||
emqx_machine_boot:stop_apps()
|
||||
catch
|
||||
C : E : St ->
|
||||
Apps = [element(1, A) || A <- application:which_applications()],
|
||||
|
|
|
@ -33,9 +33,9 @@ end_per_suite(_Config) ->
|
|||
emqx_common_test_helpers:stop_apps([]).
|
||||
|
||||
t_shutdown_reboot(_Config) ->
|
||||
emqx_machine_boot:stop_apps(normal),
|
||||
emqx_machine_boot:stop_apps(),
|
||||
false = emqx:is_running(node()),
|
||||
emqx_machine_boot:ensure_apps_started(),
|
||||
true = emqx:is_running(node()),
|
||||
ok = emqx_machine_boot:stop_apps(for_test),
|
||||
ok = emqx_machine_boot:stop_apps(),
|
||||
false = emqx:is_running(node()).
|
||||
|
|
|
@ -102,6 +102,7 @@ mnesia(boot) ->
|
|||
%%--------------------------------------------------------------------
|
||||
|
||||
start_link() ->
|
||||
_ = mria:wait_for_tables([?TELEMETRY]),
|
||||
Opts = emqx:get_config([telemetry], #{}),
|
||||
gen_server:start_link({local, ?MODULE}, ?MODULE, [Opts], []).
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}}
|
||||
, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.8.3"}}}
|
||||
, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.0"}}}
|
||||
, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.11.0"}}}
|
||||
, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.11.1"}}}
|
||||
, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.5.1"}}}
|
||||
, {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.2.5"}}}
|
||||
, {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.1"}}}
|
||||
|
|
Loading…
Reference in New Issue