Merge pull request #11567 from SergeTupchiy/EMQX-10835-increase-graceful-stop-timeout

fix(nodetool): increase graceful stop timeout
This commit is contained in:
SergeTupchiy 2023-09-13 13:22:39 +03:00 committed by GitHub
commit cf334d5542
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 3 deletions

View File

@ -812,7 +812,7 @@ is_down() {
if ps -p "$PID" >/dev/null; then
# still around
# shellcheck disable=SC2009 # this grep pattern is not a part of the program names
if ps -efp "$PID" | $GREP -q 'defunct'; then
if ps -fp "$PID" | $GREP -q 'defunct'; then
# zombie state, print parent pid
parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
logwarn "$PID is marked <defunct>, parent: $(ps -p "$parent")"
@ -831,7 +831,7 @@ wait_for() {
shift
CMD="$*"
while true; do
if $CMD >/dev/null 2>&1; then
if $CMD; then
return 0
fi
if [ "$WAIT_TIME" -le 0 ]; then

View File

@ -8,6 +8,8 @@
%% -------------------------------------------------------------------
-mode(compile).
-define(SHUTDOWN_TIMEOUT_MS, 120_000).
main(Args) ->
case os:type() of
{win32, nt} -> ok;
@ -85,9 +87,17 @@ do(Args) ->
%% a "pong"
io:format("pong\n");
["stop"] ->
case rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], 60000) of
Pid = start_shutdown_status(),
Res = rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], ?SHUTDOWN_TIMEOUT_MS),
true = stop_shutdown_status(Pid),
case Res of
ok ->
ok;
{badrpc, timeout} ->
io:format("EMQX is still shutting down, it failed to stop gracefully "
"within the configured timeout of: ~ps\n",
[erlang:convert_time_unit(?SHUTDOWN_TIMEOUT_MS, millisecond, second)]),
halt(1);
{badrpc, nodedown} ->
%% nodetool commands are always executed after a ping
%% which if the code gets here, it's because the target node
@ -145,6 +155,18 @@ do(Args) ->
end,
net_kernel:stop().
start_shutdown_status() ->
spawn_link(fun shutdown_status_loop/0).
stop_shutdown_status(Pid) ->
true = unlink(Pid),
true = exit(Pid, stop).
shutdown_status_loop() ->
timer:sleep(10_000),
io:format("EMQX is shutting down, please wait...\n", []),
shutdown_status_loop().
parse_eval_args(Args) ->
% shells may process args into more than one, and end up stripping
% spaces, so this converts all of that to a single string to parse

View File

@ -0,0 +1,4 @@
Improve EMQX graceful shutdown (`emqx stop` command):
- increase timeout from 1 to 2 minutes
- print an error message if EMQX can't stop gracefully within the configured timeout
- print periodic status messages while EMQX is shutting down