fix(code_port): improve node stop wait loop
This commit is contained in:
parent
baf8d7d91c
commit
66e848b771
58
bin/emqx
58
bin/emqx
|
@ -4,6 +4,11 @@
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
DEBUG="${DEBUG:-0}"
|
||||||
|
if [ "$DEBUG" -eq 1 ]; then
|
||||||
|
set -x
|
||||||
|
fi
|
||||||
|
|
||||||
ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)"
|
ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)"
|
||||||
# shellcheck disable=SC1090
|
# shellcheck disable=SC1090
|
||||||
. "$ROOT_DIR"/releases/emqx_vars
|
. "$ROOT_DIR"/releases/emqx_vars
|
||||||
|
@ -299,6 +304,43 @@ generate_config() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# check if a PID is down
|
||||||
|
is_down() {
|
||||||
|
PID="$1"
|
||||||
|
if ps -p "$PID" >/dev/null; then
|
||||||
|
# still around
|
||||||
|
# shellcheck disable=SC2009 # this grep pattern is not a part of the progra names
|
||||||
|
if ps -p "$PID" | grep -q 'defunct'; then
|
||||||
|
# zombie state, print parent pid
|
||||||
|
parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
|
||||||
|
echo "WARN: $PID is marked <defunct>, parent:"
|
||||||
|
ps -p "$parent"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
# it's gone
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for() {
|
||||||
|
local WAIT_TIME
|
||||||
|
local CMD
|
||||||
|
WAIT_TIME="$1"
|
||||||
|
shift
|
||||||
|
CMD="$*"
|
||||||
|
while true; do
|
||||||
|
if $CMD >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$WAIT_TIME" -le 0 ]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
WAIT_TIME=$((WAIT_TIME - 1))
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# Call bootstrapd for daemon commands like start/stop/console
|
# Call bootstrapd for daemon commands like start/stop/console
|
||||||
bootstrapd() {
|
bootstrapd() {
|
||||||
if [ -e "$RUNNER_DATA_DIR/.erlang.cookie" ]; then
|
if [ -e "$RUNNER_DATA_DIR/.erlang.cookie" ]; then
|
||||||
|
@ -485,11 +527,21 @@ case "$1" in
|
||||||
# Wait for the node to completely stop...
|
# Wait for the node to completely stop...
|
||||||
PID="$(relx_get_pid)"
|
PID="$(relx_get_pid)"
|
||||||
if ! relx_nodetool "stop"; then
|
if ! relx_nodetool "stop"; then
|
||||||
|
echoerr "Graceful shutdown failed PID=[$PID]"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
while kill -s 0 "$PID" 2>/dev/null; do
|
WAIT_TIME="${WAIT_FOR_ERLANG_STOP:-60}"
|
||||||
sleep 1
|
if ! wait_for "$WAIT_TIME" 'is_down' "$PID"; then
|
||||||
done
|
msg="dangling after ${WAIT_TIME} seconds"
|
||||||
|
# also log to syslog
|
||||||
|
logger -t "${REL_NAME}[${PID}]" "STOP: $msg"
|
||||||
|
# log to user console
|
||||||
|
echoerr "stop failed, $msg"
|
||||||
|
echo "ERROR: $PID is still around"
|
||||||
|
ps -p "$PID"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
logger -t "${REL_NAME}[${PID}]" "STOP: OK"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
restart|reboot)
|
restart|reboot)
|
||||||
|
|
Loading…
Reference in New Issue