fix: improve node stop wait loop
This commit is contained in:
parent
9f3063a823
commit
668ecbe97b
|
@ -41,7 +41,7 @@ start_link() ->
|
||||||
|
|
||||||
is_running() -> is_pid(whereis(?TERMINATOR)).
|
is_running() -> is_pid(whereis(?TERMINATOR)).
|
||||||
|
|
||||||
%% @doc Send a signal to activate the terminator.
|
%% @doc Call terminator to stop applications, then issue init:stop() to terminat the VM.
|
||||||
graceful() ->
|
graceful() ->
|
||||||
try
|
try
|
||||||
_ = gen_server:call(?TERMINATOR, ?DO_IT, infinity)
|
_ = gen_server:call(?TERMINATOR, ?DO_IT, infinity)
|
||||||
|
@ -52,28 +52,19 @@ graceful() ->
|
||||||
%% should issue a shutdown to be sure
|
%% should issue a shutdown to be sure
|
||||||
%% NOTE: not exit_loop here because we do not want to
|
%% NOTE: not exit_loop here because we do not want to
|
||||||
%% block erl_signal_server
|
%% block erl_signal_server
|
||||||
|
?ELOG("Shutdown before node is ready?~n", []),
|
||||||
init:stop()
|
init:stop()
|
||||||
end,
|
end,
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
%% @doc Shutdown the Erlang VM and wait until the terminator dies or the VM dies.
|
%% @doc Shutdown the Erlang VM and wait indefinitely.
|
||||||
graceful_wait() ->
|
graceful_wait() ->
|
||||||
case whereis(?TERMINATOR) of
|
|
||||||
undefined ->
|
|
||||||
?SLOG(warning, #{msg => "shutdown_before_boot_is_complete"}),
|
|
||||||
exit_loop();
|
|
||||||
Pid ->
|
|
||||||
ok = graceful(),
|
ok = graceful(),
|
||||||
Ref = monitor(process, Pid),
|
exit_loop().
|
||||||
%% NOTE: not exactly sure, but maybe there is a chance that
|
|
||||||
%% Erlang VM goes down before this receive.
|
|
||||||
%% In which case, the remote caller will get {badrpc, nodedown}
|
|
||||||
receive {'DOWN', Ref, process, Pid, _} -> ok end
|
|
||||||
end.
|
|
||||||
|
|
||||||
exit_loop() ->
|
exit_loop() ->
|
||||||
init:stop(),
|
|
||||||
timer:sleep(100),
|
timer:sleep(100),
|
||||||
|
init:stop(),
|
||||||
exit_loop().
|
exit_loop().
|
||||||
|
|
||||||
init(_) ->
|
init(_) ->
|
||||||
|
|
49
bin/emqx
49
bin/emqx
|
@ -299,6 +299,33 @@ bootstrapd() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# check if a PID is down
|
||||||
|
is_down() {
|
||||||
|
PID="$1"
|
||||||
|
if kill -s 0 "$PID" 2>/dev/null; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for() {
|
||||||
|
local WAIT_TIME
|
||||||
|
local CMD
|
||||||
|
WAIT_TIME="$1"
|
||||||
|
shift
|
||||||
|
CMD="$*"
|
||||||
|
while true; do
|
||||||
|
if $CMD >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$WAIT_TIME" -le 0 ]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
WAIT_TIME=$((WAIT_TIME - 1))
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# Use $CWD/etc/sys.config if exists
|
# Use $CWD/etc/sys.config if exists
|
||||||
if [ -z "$RELX_CONFIG_PATH" ]; then
|
if [ -z "$RELX_CONFIG_PATH" ]; then
|
||||||
if [ -f "$RUNNER_ETC_DIR/sys.config" ]; then
|
if [ -f "$RUNNER_ETC_DIR/sys.config" ]; then
|
||||||
|
@ -437,22 +464,16 @@ case "$1" in
|
||||||
"$(relx_start_command)"
|
"$(relx_start_command)"
|
||||||
|
|
||||||
WAIT_TIME=${WAIT_FOR_ERLANG:-15}
|
WAIT_TIME=${WAIT_FOR_ERLANG:-15}
|
||||||
while [ "$WAIT_TIME" -gt 0 ]; do
|
if wait_for "$WAIT_TIME" 'relx_nodetool' 'ping'; then
|
||||||
if ! relx_nodetool "ping" >/dev/null 2>&1; then
|
|
||||||
WAIT_TIME=$((WAIT_TIME - 1))
|
|
||||||
sleep 1
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
if relx_nodetool "ping" >/dev/null 2>&1; then
|
|
||||||
echo "$EMQX_DESCRIPTION $REL_VSN is started successfully!"
|
echo "$EMQX_DESCRIPTION $REL_VSN is started successfully!"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
else
|
||||||
done && echo "$EMQX_DESCRIPTION $REL_VSN failed to start within ${WAIT_FOR_ERLANG:-15} seconds,"
|
echo "$EMQX_DESCRIPTION $REL_VSN failed to start within ${WAIT_TIME} seconds,"
|
||||||
echo "see the output of '$0 console' for more information."
|
echo "see the output of '$0 console' for more information."
|
||||||
echo "If you want to wait longer, set the environment variable"
|
echo "If you want to wait longer, set the environment variable"
|
||||||
echo "WAIT_FOR_ERLANG to the number of seconds to wait."
|
echo "WAIT_FOR_ERLANG to the number of seconds to wait."
|
||||||
exit 1
|
exit 1
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
|
|
||||||
stop)
|
stop)
|
||||||
|
@ -462,11 +483,11 @@ case "$1" in
|
||||||
echoerr "emqx_graceful_shutdown_failed PID=[$PID]"
|
echoerr "emqx_graceful_shutdown_failed PID=[$PID]"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
while kill -s 0 "$PID" 2>/dev/null; do
|
WAIT_TIME=30
|
||||||
sleep 1
|
if ! wait_for "$WAIT_TIME" is_down "$PID"; then
|
||||||
done
|
echoerr "emqx_pid_dangling_after ${WAIT_TIME} seconds PID=[$PID]"
|
||||||
echoerr "emqx_pid_dangling_after ${max_wait} seconds PID=[$PID]"
|
|
||||||
exit 1
|
exit 1
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
|
|
||||||
restart|reboot)
|
restart|reboot)
|
||||||
|
|
Loading…
Reference in New Issue