fix(code_port): improve node stop wait loop
This commit is contained in:
parent
baf8d7d91c
commit
66e848b771
58
bin/emqx
58
bin/emqx
|
@ -4,6 +4,11 @@
|
|||
|
||||
set -e
|
||||
|
||||
DEBUG="${DEBUG:-0}"
|
||||
if [ "$DEBUG" -eq 1 ]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)"
|
||||
# shellcheck disable=SC1090
|
||||
. "$ROOT_DIR"/releases/emqx_vars
|
||||
|
@ -299,6 +304,43 @@ generate_config() {
|
|||
fi
|
||||
}
|
||||
|
||||
# check if a PID is down
|
||||
is_down() {
|
||||
PID="$1"
|
||||
if ps -p "$PID" >/dev/null; then
|
||||
# still around
|
||||
# shellcheck disable=SC2009 # this grep pattern is not a part of the progra names
|
||||
if ps -p "$PID" | grep -q 'defunct'; then
|
||||
# zombie state, print parent pid
|
||||
parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
|
||||
echo "WARN: $PID is marked <defunct>, parent:"
|
||||
ps -p "$parent"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
# it's gone
|
||||
return 0
|
||||
}
|
||||
|
||||
wait_for() {
|
||||
local WAIT_TIME
|
||||
local CMD
|
||||
WAIT_TIME="$1"
|
||||
shift
|
||||
CMD="$*"
|
||||
while true; do
|
||||
if $CMD >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
if [ "$WAIT_TIME" -le 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
WAIT_TIME=$((WAIT_TIME - 1))
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
# Call bootstrapd for daemon commands like start/stop/console
|
||||
bootstrapd() {
|
||||
if [ -e "$RUNNER_DATA_DIR/.erlang.cookie" ]; then
|
||||
|
@ -485,11 +527,21 @@ case "$1" in
|
|||
# Wait for the node to completely stop...
|
||||
PID="$(relx_get_pid)"
|
||||
if ! relx_nodetool "stop"; then
|
||||
echoerr "Graceful shutdown failed PID=[$PID]"
|
||||
exit 1
|
||||
fi
|
||||
while kill -s 0 "$PID" 2>/dev/null; do
|
||||
sleep 1
|
||||
done
|
||||
WAIT_TIME="${WAIT_FOR_ERLANG_STOP:-60}"
|
||||
if ! wait_for "$WAIT_TIME" 'is_down' "$PID"; then
|
||||
msg="dangling after ${WAIT_TIME} seconds"
|
||||
# also log to syslog
|
||||
logger -t "${REL_NAME}[${PID}]" "STOP: $msg"
|
||||
# log to user console
|
||||
echoerr "stop failed, $msg"
|
||||
echo "ERROR: $PID is still around"
|
||||
ps -p "$PID"
|
||||
exit 1
|
||||
fi
|
||||
logger -t "${REL_NAME}[${PID}]" "STOP: OK"
|
||||
;;
|
||||
|
||||
restart|reboot)
|
||||
|
|
Loading…
Reference in New Issue