feat: add graceful shutdown
prior to this cahnge emqx node shutdown is done by init:stop which might have undesired stop order of the applications in this change, emqx_machine_terminator is added to stop apps in defined order and then terminate the node in infinite loop
This commit is contained in:
parent
70e49ab629
commit
bc23ff5e47
|
@ -138,7 +138,7 @@ EOF
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
IDLE_TIME=0
|
IDLE_TIME=0
|
||||||
while ! curl http://localhost:8081/api/v5/status >/dev/null 2>&1; do
|
while ! curl http://localhost:8081/api/v5/status >/dev/null 2>&1; do
|
||||||
if [ $IDLE_TIME -gt 10 ]
|
if [ $IDLE_TIME -gt 10 ]
|
||||||
then
|
then
|
||||||
echo "emqx running error"
|
echo "emqx running error"
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_machine).
|
||||||
|
|
||||||
|
-export([start/0,
|
||||||
|
graceful_shutdown/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
start() ->
|
||||||
|
ok = emqx_machine_terminator:start().
|
||||||
|
|
||||||
|
graceful_shutdown() ->
|
||||||
|
emqx_machine_terminator:graceful().
|
|
@ -18,11 +18,9 @@
|
||||||
|
|
||||||
-export([ start/2
|
-export([ start/2
|
||||||
, stop/1
|
, stop/1
|
||||||
, prep_stop/1
|
|
||||||
]).
|
]).
|
||||||
|
|
||||||
%% Shutdown and reboot
|
-export([ stop_apps/1
|
||||||
-export([ shutdown/1
|
|
||||||
, ensure_apps_started/0
|
, ensure_apps_started/0
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
@ -50,11 +48,9 @@ start(_Type, _Args) ->
|
||||||
ok = print_vsn(),
|
ok = print_vsn(),
|
||||||
|
|
||||||
ok = start_autocluster(),
|
ok = start_autocluster(),
|
||||||
|
ok = emqx_machine:start(),
|
||||||
{ok, RootSupPid}.
|
{ok, RootSupPid}.
|
||||||
|
|
||||||
prep_stop(_State) ->
|
|
||||||
application:stop(emqx).
|
|
||||||
|
|
||||||
stop(_State) ->
|
stop(_State) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
|
@ -96,13 +92,13 @@ load_config_files() ->
|
||||||
ok = emqx_app:set_init_config_load_done().
|
ok = emqx_app:set_init_config_load_done().
|
||||||
|
|
||||||
start_autocluster() ->
|
start_autocluster() ->
|
||||||
ekka:callback(prepare, fun ?MODULE:shutdown/1),
|
ekka:callback(prepare, fun ?MODULE:stop_apps/1),
|
||||||
ekka:callback(reboot, fun ?MODULE:ensure_apps_started/0),
|
ekka:callback(reboot, fun ?MODULE:ensure_apps_started/0),
|
||||||
_ = ekka:autocluster(emqx), %% returns 'ok' or a pid or 'any()' as in spec
|
_ = ekka:autocluster(emqx), %% returns 'ok' or a pid or 'any()' as in spec
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
shutdown(Reason) ->
|
stop_apps(Reason) ->
|
||||||
?SLOG(critical, #{msg => "stopping_apps", reason => Reason}),
|
?SLOG(info, #{msg => "stopping_apps", reason => Reason}),
|
||||||
_ = emqx_alarm_handler:unload(),
|
_ = emqx_alarm_handler:unload(),
|
||||||
lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())).
|
lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())).
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
-module(emqx_machine_terminator).
|
||||||
|
|
||||||
|
-export([ start/0
|
||||||
|
, graceful/0
|
||||||
|
, terminator_loop/0
|
||||||
|
]).
|
||||||
|
|
||||||
|
-define(TERMINATOR, ?MODULE).
|
||||||
|
|
||||||
|
%% @doc This API is called to shutdown the Erlang VM by RPC call from remote shell node.
|
||||||
|
%% The shutown of apps is delegated to a to a process instead of doing it in the RPC spawned
|
||||||
|
%% process which has a remote group leader.
|
||||||
|
start() ->
|
||||||
|
_ = spawn_link(
|
||||||
|
fun() ->
|
||||||
|
register(?TERMINATOR, self()),
|
||||||
|
terminator_loop()
|
||||||
|
end),
|
||||||
|
ok.
|
||||||
|
|
||||||
|
%% internal use
|
||||||
|
terminator_loop() ->
|
||||||
|
receive
|
||||||
|
graceful_shutdown ->
|
||||||
|
ok = emqx_machine_app:stop_apps(normal),
|
||||||
|
exit_loop()
|
||||||
|
after
|
||||||
|
1000 ->
|
||||||
|
%% keep looping for beam reload
|
||||||
|
?MODULE:terminator_loop()
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% @doc Shutdown the Erlang VM.
|
||||||
|
graceful() ->
|
||||||
|
case whereis(?TERMINATOR) of
|
||||||
|
undefined ->
|
||||||
|
exit(emqx_machine_not_started);
|
||||||
|
Pid ->
|
||||||
|
Pid ! graceful_shutdown,
|
||||||
|
Ref = monitor(process, Pid),
|
||||||
|
%% NOTE: not exactly sure, but maybe there is a chance that
|
||||||
|
%% Erlang VM goes down before this receive.
|
||||||
|
%% In which case, the remote caller will get {badrpc, nodedown}
|
||||||
|
receive {'DOWN', Ref, process, Pid, _} -> ok end
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% Loop until Erlang VM exits
|
||||||
|
exit_loop() ->
|
||||||
|
init:stop(),
|
||||||
|
timer:sleep(100),
|
||||||
|
exit_loop().
|
|
@ -33,9 +33,9 @@ end_per_suite(_Config) ->
|
||||||
emqx_ct_helpers:stop_apps([]).
|
emqx_ct_helpers:stop_apps([]).
|
||||||
|
|
||||||
t_shutdown_reboot(_Config) ->
|
t_shutdown_reboot(_Config) ->
|
||||||
emqx_machine_app:shutdown(normal),
|
emqx_machine_app:stop_apps(normal),
|
||||||
false = emqx:is_running(node()),
|
false = emqx:is_running(node()),
|
||||||
emqx_machine_app:ensure_apps_started(),
|
emqx_machine_app:ensure_apps_started(),
|
||||||
true = emqx:is_running(node()),
|
true = emqx:is_running(node()),
|
||||||
ok = emqx_machine_app:shutdown(for_test),
|
ok = emqx_machine_app:stop_apps(for_test),
|
||||||
false = emqx:is_running(node()).
|
false = emqx:is_running(node()).
|
||||||
|
|
2
bin/emqx
2
bin/emqx
|
@ -99,7 +99,7 @@ relx_usage() {
|
||||||
echo " don't make it permanent"
|
echo " don't make it permanent"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Usage: $REL_NAME {start|start_boot <file>|ertspath|foreground|stop|restart|reboot|pid|ping|console|console_clean|console_boot <file>|attach|remote_console|upgrade|downgrade|install|uninstall|versions|escript|ctl|rpc|rpcterms|eval|root_dir}"
|
echo "Usage: $REL_NAME {start|start_boot <file>|ertspath|foreground|stop|pid|ping|console|console_clean|console_boot <file>|attach|remote_console|upgrade|downgrade|install|uninstall|versions|escript|ctl|rpc|rpcterms|eval|root_dir}"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
14
bin/nodetool
14
bin/nodetool
|
@ -72,9 +72,15 @@ do(Args) ->
|
||||||
%% a "pong"
|
%% a "pong"
|
||||||
io:format("pong\n");
|
io:format("pong\n");
|
||||||
["stop"] ->
|
["stop"] ->
|
||||||
io:format("~p\n", [rpc:call(TargetNode, init, stop, [], 60000)]);
|
case rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], 60000) of
|
||||||
["restart", "-config", ConfigFile | _RestArgs1] ->
|
ok ->
|
||||||
io:format("~p\n", [rpc:call(TargetNode, emqx, restart, [ConfigFile], 60000)]);
|
ok;
|
||||||
|
{badrpc, nodedown} ->
|
||||||
|
%% nodetool commands are always executed after a ping
|
||||||
|
%% which if the code gets here, it's because the target node
|
||||||
|
%% has shutdown before RPC returns.
|
||||||
|
ok
|
||||||
|
end;
|
||||||
["rpc", Module, Function | RpcArgs] ->
|
["rpc", Module, Function | RpcArgs] ->
|
||||||
case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function),
|
case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function),
|
||||||
[RpcArgs], 60000) of
|
[RpcArgs], 60000) of
|
||||||
|
@ -141,7 +147,7 @@ do(Args) ->
|
||||||
end;
|
end;
|
||||||
Other ->
|
Other ->
|
||||||
io:format("Other: ~p\n", [Other]),
|
io:format("Other: ~p\n", [Other]),
|
||||||
io:format("Usage: nodetool {genconfig, chkconfig|getpid|ping|stop|restart|reboot|rpc|rpc_infinity|rpcterms|eval [Terms]} [RPC]\n")
|
io:format("Usage: nodetool {genconfig, chkconfig|getpid|ping|stop|rpc|rpc_infinity|rpcterms|eval [Terms]} [RPC]\n")
|
||||||
end,
|
end,
|
||||||
net_kernel:stop().
|
net_kernel:stop().
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue