Merge pull request #11567 from SergeTupchiy/EMQX-10835-increase-graceful-stop-timeout
fix(nodetool): increase graceful stop timeout
This commit is contained in:
commit
cf334d5542
4
bin/emqx
4
bin/emqx
|
@ -812,7 +812,7 @@ is_down() {
|
||||||
if ps -p "$PID" >/dev/null; then
|
if ps -p "$PID" >/dev/null; then
|
||||||
# still around
|
# still around
|
||||||
# shellcheck disable=SC2009 # this grep pattern is not a part of the program names
|
# shellcheck disable=SC2009 # this grep pattern is not a part of the program names
|
||||||
if ps -efp "$PID" | $GREP -q 'defunct'; then
|
if ps -fp "$PID" | $GREP -q 'defunct'; then
|
||||||
# zombie state, print parent pid
|
# zombie state, print parent pid
|
||||||
parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
|
parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
|
||||||
logwarn "$PID is marked <defunct>, parent: $(ps -p "$parent")"
|
logwarn "$PID is marked <defunct>, parent: $(ps -p "$parent")"
|
||||||
|
@ -831,7 +831,7 @@ wait_for() {
|
||||||
shift
|
shift
|
||||||
CMD="$*"
|
CMD="$*"
|
||||||
while true; do
|
while true; do
|
||||||
if $CMD >/dev/null 2>&1; then
|
if $CMD; then
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
if [ "$WAIT_TIME" -le 0 ]; then
|
if [ "$WAIT_TIME" -le 0 ]; then
|
||||||
|
|
24
bin/nodetool
24
bin/nodetool
|
@ -8,6 +8,8 @@
|
||||||
%% -------------------------------------------------------------------
|
%% -------------------------------------------------------------------
|
||||||
-mode(compile).
|
-mode(compile).
|
||||||
|
|
||||||
|
-define(SHUTDOWN_TIMEOUT_MS, 120_000).
|
||||||
|
|
||||||
main(Args) ->
|
main(Args) ->
|
||||||
case os:type() of
|
case os:type() of
|
||||||
{win32, nt} -> ok;
|
{win32, nt} -> ok;
|
||||||
|
@ -85,9 +87,17 @@ do(Args) ->
|
||||||
%% a "pong"
|
%% a "pong"
|
||||||
io:format("pong\n");
|
io:format("pong\n");
|
||||||
["stop"] ->
|
["stop"] ->
|
||||||
case rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], 60000) of
|
Pid = start_shutdown_status(),
|
||||||
|
Res = rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], ?SHUTDOWN_TIMEOUT_MS),
|
||||||
|
true = stop_shutdown_status(Pid),
|
||||||
|
case Res of
|
||||||
ok ->
|
ok ->
|
||||||
ok;
|
ok;
|
||||||
|
{badrpc, timeout} ->
|
||||||
|
io:format("EMQX is still shutting down, it failed to stop gracefully "
|
||||||
|
"within the configured timeout of: ~ps\n",
|
||||||
|
[erlang:convert_time_unit(?SHUTDOWN_TIMEOUT_MS, millisecond, second)]),
|
||||||
|
halt(1);
|
||||||
{badrpc, nodedown} ->
|
{badrpc, nodedown} ->
|
||||||
%% nodetool commands are always executed after a ping
|
%% nodetool commands are always executed after a ping
|
||||||
%% which if the code gets here, it's because the target node
|
%% which if the code gets here, it's because the target node
|
||||||
|
@ -145,6 +155,18 @@ do(Args) ->
|
||||||
end,
|
end,
|
||||||
net_kernel:stop().
|
net_kernel:stop().
|
||||||
|
|
||||||
|
start_shutdown_status() ->
|
||||||
|
spawn_link(fun shutdown_status_loop/0).
|
||||||
|
|
||||||
|
stop_shutdown_status(Pid) ->
|
||||||
|
true = unlink(Pid),
|
||||||
|
true = exit(Pid, stop).
|
||||||
|
|
||||||
|
shutdown_status_loop() ->
|
||||||
|
timer:sleep(10_000),
|
||||||
|
io:format("EMQX is shutting down, please wait...\n", []),
|
||||||
|
shutdown_status_loop().
|
||||||
|
|
||||||
parse_eval_args(Args) ->
|
parse_eval_args(Args) ->
|
||||||
% shells may process args into more than one, and end up stripping
|
% shells may process args into more than one, and end up stripping
|
||||||
% spaces, so this converts all of that to a single string to parse
|
% spaces, so this converts all of that to a single string to parse
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
Improve EMQX graceful shutdown (`emqx stop` command):
|
||||||
|
- increase timeout from 1 to 2 minutes
|
||||||
|
- print an error message if EMQX can't stop gracefully within the configured timeout
|
||||||
|
- print periodic status messages while EMQX is shutting down
|
Loading…
Reference in New Issue