Merge pull request #11567 from SergeTupchiy/EMQX-10835-increase-graceful-stop-timeout
fix(nodetool): increase graceful stop timeout
This commit is contained in:
commit
cf334d5542
4
bin/emqx
4
bin/emqx
|
@ -812,7 +812,7 @@ is_down() {
|
|||
if ps -p "$PID" >/dev/null; then
|
||||
# still around
|
||||
# shellcheck disable=SC2009 # this grep pattern is not a part of the program names
|
||||
if ps -efp "$PID" | $GREP -q 'defunct'; then
|
||||
if ps -fp "$PID" | $GREP -q 'defunct'; then
|
||||
# zombie state, print parent pid
|
||||
parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
|
||||
logwarn "$PID is marked <defunct>, parent: $(ps -p "$parent")"
|
||||
|
@ -831,7 +831,7 @@ wait_for() {
|
|||
shift
|
||||
CMD="$*"
|
||||
while true; do
|
||||
if $CMD >/dev/null 2>&1; then
|
||||
if $CMD; then
|
||||
return 0
|
||||
fi
|
||||
if [ "$WAIT_TIME" -le 0 ]; then
|
||||
|
|
24
bin/nodetool
24
bin/nodetool
|
@ -8,6 +8,8 @@
|
|||
%% -------------------------------------------------------------------
|
||||
-mode(compile).
|
||||
|
||||
-define(SHUTDOWN_TIMEOUT_MS, 120_000).
|
||||
|
||||
main(Args) ->
|
||||
case os:type() of
|
||||
{win32, nt} -> ok;
|
||||
|
@ -85,9 +87,17 @@ do(Args) ->
|
|||
%% a "pong"
|
||||
io:format("pong\n");
|
||||
["stop"] ->
|
||||
case rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], 60000) of
|
||||
Pid = start_shutdown_status(),
|
||||
Res = rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], ?SHUTDOWN_TIMEOUT_MS),
|
||||
true = stop_shutdown_status(Pid),
|
||||
case Res of
|
||||
ok ->
|
||||
ok;
|
||||
{badrpc, timeout} ->
|
||||
io:format("EMQX is still shutting down, it failed to stop gracefully "
|
||||
"within the configured timeout of: ~ps\n",
|
||||
[erlang:convert_time_unit(?SHUTDOWN_TIMEOUT_MS, millisecond, second)]),
|
||||
halt(1);
|
||||
{badrpc, nodedown} ->
|
||||
%% nodetool commands are always executed after a ping
|
||||
%% which if the code gets here, it's because the target node
|
||||
|
@ -145,6 +155,18 @@ do(Args) ->
|
|||
end,
|
||||
net_kernel:stop().
|
||||
|
||||
start_shutdown_status() ->
|
||||
spawn_link(fun shutdown_status_loop/0).
|
||||
|
||||
stop_shutdown_status(Pid) ->
|
||||
true = unlink(Pid),
|
||||
true = exit(Pid, stop).
|
||||
|
||||
shutdown_status_loop() ->
|
||||
timer:sleep(10_000),
|
||||
io:format("EMQX is shutting down, please wait...\n", []),
|
||||
shutdown_status_loop().
|
||||
|
||||
parse_eval_args(Args) ->
|
||||
% shells may process args into more than one, and end up stripping
|
||||
% spaces, so this converts all of that to a single string to parse
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
Improve EMQX graceful shutdown (`emqx stop` command):
|
||||
- increase timeout from 1 to 2 minutes
|
||||
- print an error message if EMQX can't stop gracefully within the configured timeout
|
||||
- print periodic status messages while EMQX is shutting down
|
Loading…
Reference in New Issue