feat: add graceful shutdown
prior to this cahnge emqx node shutdown is done by init:stop which might have undesired stop order of the applications in this change, emqx_machine_terminator is added to stop apps in defined order and then terminate the node in infinite loop
This commit is contained in:
parent
70e49ab629
commit
bc23ff5e47
|
@ -0,0 +1,27 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_machine).
|
||||
|
||||
-export([start/0,
|
||||
graceful_shutdown/0
|
||||
]).
|
||||
|
||||
start() ->
|
||||
ok = emqx_machine_terminator:start().
|
||||
|
||||
graceful_shutdown() ->
|
||||
emqx_machine_terminator:graceful().
|
|
@ -18,11 +18,9 @@
|
|||
|
||||
-export([ start/2
|
||||
, stop/1
|
||||
, prep_stop/1
|
||||
]).
|
||||
|
||||
%% Shutdown and reboot
|
||||
-export([ shutdown/1
|
||||
-export([ stop_apps/1
|
||||
, ensure_apps_started/0
|
||||
]).
|
||||
|
||||
|
@ -50,11 +48,9 @@ start(_Type, _Args) ->
|
|||
ok = print_vsn(),
|
||||
|
||||
ok = start_autocluster(),
|
||||
ok = emqx_machine:start(),
|
||||
{ok, RootSupPid}.
|
||||
|
||||
prep_stop(_State) ->
|
||||
application:stop(emqx).
|
||||
|
||||
stop(_State) ->
|
||||
ok.
|
||||
|
||||
|
@ -96,13 +92,13 @@ load_config_files() ->
|
|||
ok = emqx_app:set_init_config_load_done().
|
||||
|
||||
start_autocluster() ->
|
||||
ekka:callback(prepare, fun ?MODULE:shutdown/1),
|
||||
ekka:callback(prepare, fun ?MODULE:stop_apps/1),
|
||||
ekka:callback(reboot, fun ?MODULE:ensure_apps_started/0),
|
||||
_ = ekka:autocluster(emqx), %% returns 'ok' or a pid or 'any()' as in spec
|
||||
ok.
|
||||
|
||||
shutdown(Reason) ->
|
||||
?SLOG(critical, #{msg => "stopping_apps", reason => Reason}),
|
||||
stop_apps(Reason) ->
|
||||
?SLOG(info, #{msg => "stopping_apps", reason => Reason}),
|
||||
_ = emqx_alarm_handler:unload(),
|
||||
lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())).
|
||||
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_machine_terminator).
|
||||
|
||||
-export([ start/0
|
||||
, graceful/0
|
||||
, terminator_loop/0
|
||||
]).
|
||||
|
||||
-define(TERMINATOR, ?MODULE).
|
||||
|
||||
%% @doc This API is called to shutdown the Erlang VM by RPC call from remote shell node.
|
||||
%% The shutown of apps is delegated to a to a process instead of doing it in the RPC spawned
|
||||
%% process which has a remote group leader.
|
||||
start() ->
|
||||
_ = spawn_link(
|
||||
fun() ->
|
||||
register(?TERMINATOR, self()),
|
||||
terminator_loop()
|
||||
end),
|
||||
ok.
|
||||
|
||||
%% internal use
|
||||
terminator_loop() ->
|
||||
receive
|
||||
graceful_shutdown ->
|
||||
ok = emqx_machine_app:stop_apps(normal),
|
||||
exit_loop()
|
||||
after
|
||||
1000 ->
|
||||
%% keep looping for beam reload
|
||||
?MODULE:terminator_loop()
|
||||
end.
|
||||
|
||||
%% @doc Shutdown the Erlang VM.
|
||||
graceful() ->
|
||||
case whereis(?TERMINATOR) of
|
||||
undefined ->
|
||||
exit(emqx_machine_not_started);
|
||||
Pid ->
|
||||
Pid ! graceful_shutdown,
|
||||
Ref = monitor(process, Pid),
|
||||
%% NOTE: not exactly sure, but maybe there is a chance that
|
||||
%% Erlang VM goes down before this receive.
|
||||
%% In which case, the remote caller will get {badrpc, nodedown}
|
||||
receive {'DOWN', Ref, process, Pid, _} -> ok end
|
||||
end.
|
||||
|
||||
%% Loop until Erlang VM exits
|
||||
exit_loop() ->
|
||||
init:stop(),
|
||||
timer:sleep(100),
|
||||
exit_loop().
|
|
@ -33,9 +33,9 @@ end_per_suite(_Config) ->
|
|||
emqx_ct_helpers:stop_apps([]).
|
||||
|
||||
t_shutdown_reboot(_Config) ->
|
||||
emqx_machine_app:shutdown(normal),
|
||||
emqx_machine_app:stop_apps(normal),
|
||||
false = emqx:is_running(node()),
|
||||
emqx_machine_app:ensure_apps_started(),
|
||||
true = emqx:is_running(node()),
|
||||
ok = emqx_machine_app:shutdown(for_test),
|
||||
ok = emqx_machine_app:stop_apps(for_test),
|
||||
false = emqx:is_running(node()).
|
||||
|
|
2
bin/emqx
2
bin/emqx
|
@ -99,7 +99,7 @@ relx_usage() {
|
|||
echo " don't make it permanent"
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $REL_NAME {start|start_boot <file>|ertspath|foreground|stop|restart|reboot|pid|ping|console|console_clean|console_boot <file>|attach|remote_console|upgrade|downgrade|install|uninstall|versions|escript|ctl|rpc|rpcterms|eval|root_dir}"
|
||||
echo "Usage: $REL_NAME {start|start_boot <file>|ertspath|foreground|stop|pid|ping|console|console_clean|console_boot <file>|attach|remote_console|upgrade|downgrade|install|uninstall|versions|escript|ctl|rpc|rpcterms|eval|root_dir}"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
|
14
bin/nodetool
14
bin/nodetool
|
@ -72,9 +72,15 @@ do(Args) ->
|
|||
%% a "pong"
|
||||
io:format("pong\n");
|
||||
["stop"] ->
|
||||
io:format("~p\n", [rpc:call(TargetNode, init, stop, [], 60000)]);
|
||||
["restart", "-config", ConfigFile | _RestArgs1] ->
|
||||
io:format("~p\n", [rpc:call(TargetNode, emqx, restart, [ConfigFile], 60000)]);
|
||||
case rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], 60000) of
|
||||
ok ->
|
||||
ok;
|
||||
{badrpc, nodedown} ->
|
||||
%% nodetool commands are always executed after a ping
|
||||
%% which if the code gets here, it's because the target node
|
||||
%% has shutdown before RPC returns.
|
||||
ok
|
||||
end;
|
||||
["rpc", Module, Function | RpcArgs] ->
|
||||
case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function),
|
||||
[RpcArgs], 60000) of
|
||||
|
@ -141,7 +147,7 @@ do(Args) ->
|
|||
end;
|
||||
Other ->
|
||||
io:format("Other: ~p\n", [Other]),
|
||||
io:format("Usage: nodetool {genconfig, chkconfig|getpid|ping|stop|restart|reboot|rpc|rpc_infinity|rpcterms|eval [Terms]} [RPC]\n")
|
||||
io:format("Usage: nodetool {genconfig, chkconfig|getpid|ping|stop|rpc|rpc_infinity|rpcterms|eval [Terms]} [RPC]\n")
|
||||
end,
|
||||
net_kernel:stop().
|
||||
|
||||
|
|
Loading…
Reference in New Issue