fix(bin/emqx): allow starting two nodes from the same installation

If more than one node is boot from the same root directory
try to find the node by node name set in EMQX_NODE_NAME
or EMQX_NODE__NAME environment variable
This commit is contained in:
Zaiming (Stone) Shi 2023-02-16 09:59:20 +01:00
parent 41c7e8a1c3
commit 6b81d9965f
1 changed files with 42 additions and 22 deletions

View File

@ -76,6 +76,12 @@ logwarn() {
fi
}
logdebug() {
if [ "$DEBUG" -eq 1 ]; then
echo "DEBUG: $*"
fi
}
die() {
set +x
logerr "$1"
@ -453,24 +459,37 @@ if [ "$IS_ENTERPRISE" = 'yes' ]; then
CONF_KEYS+=( 'license.key' )
fi
## Find the running node from 'ps -ef'
## The primary grep pattern is $RUNNER_ROOT_DIR because one can start multiple nodes at the same time
# shellcheck disable=SC2009
PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)"
if [ -n "${PS_LINE}" ]; then
RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | wc -l)"
else
RUNNING_NODES_COUNT=0
## To be backward compatible, read and then unset EMQX_NODE_NAME
if [ -n "${EMQX_NODE_NAME:-}" ]; then
export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
unset EMQX_NODE_NAME
fi
# Turn off debug as the ps output can be quite noisy
set +x
## Find the running node from 'ps -ef'
## * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself
## * The running 'remsh' and 'nodetool' processes must be excluded
if [ -n "${EMQX_NODE__NAME:-}" ]; then
# if node name is provided, filter by node name
# shellcheck disable=SC2009
PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -E "\s\-s?name\s${EMQX_NODE__NAME}" | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)"
else
# shellcheck disable=SC2009
PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)"
fi
logdebug "PS_LINE=$PS_LINE"
RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | sed '/^\s*$/d' | wc -l)"
[ "$RUNNING_NODES_COUNT" -gt 1 ] && logdebug "More than one running node found: count=$RUNNING_NODES_COUNT"
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
if [ "$RUNNING_NODES_COUNT" -gt 0 ] && [ "$COMMAND" != 'check_config' ]; then
tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true)
echo "Node ${tmp_nodename} is already running!"
exit 1
running_node_name=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true)
if [ -n "$running_node_name" ] && [ "$running_node_name" = "${EMQX_NODE__NAME:-}" ]; then
echo "Node ${running_node_name} is already running!"
exit 1
fi
fi
[ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1
maybe_use_portable_dynlibs
@ -502,8 +521,6 @@ else
# then update the config in the file to 'node.name = "emqx@local.net"', after this change,
# there would be no way stop the running node 'emqx@127.0.0.1', because 'emqx stop' command
# would try to stop the new node instead.
# * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself
# * The running 'remsh' and 'nodetool' processes must be excluded
if [ "$RUNNING_NODES_COUNT" -eq 1 ]; then
## only one emqx node is running, get running args from 'ps -ef' output
tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true)
@ -520,14 +537,22 @@ else
## Make the format like what call_hocon multi_get prints out, but only need 4 args
EMQX_BOOT_CONFIGS="node.name=${tmp_nodename}\nnode.cookie=${tmp_cookie}\ncluster.proto_dist=${tmp_proto}\nnode.dist_net_ticktime=$tmp_ticktime\nnode.data_dir=${tmp_datadir}"
else
## None or more than one node is running, resolve from boot config
## we have no choiece but to read the bootstrap config (with environment overrides available in the current shell)
if [ "$RUNNING_NODES_COUNT" -gt 1 ]; then
if [ -z "${EMQX_NODE__NAME:-}" ]; then
tmp_nodenames=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' | tr '\n' ' ')
logerr "More than one EMQX node found running (root dir: ${RUNNER_ROOT_DIR})"
logerr "Running nodes: $tmp_nodenames"
logerr "Make sure environment variable EMQX_NODE__NAME is set to indicate for which node this command is intended."
exit 1
fi
fi
## We have no choiece but to read the bootstrap config (with environment overrides available in the current shell)
[ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1
maybe_use_portable_dynlibs
EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
fi
fi
[ "$DEBUG" -eq 1 ] && echo "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS"
logdebug "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS"
[ "$DEBUG" -eq 1 ] && set -x
get_boot_config() {
@ -877,11 +902,6 @@ maybe_log_to_console() {
fi
}
## To be backward compatible, read and then unset EMQX_NODE_NAME
if [ -n "${EMQX_NODE_NAME:-}" ]; then
export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
unset EMQX_NODE_NAME
fi
## Possible ways to configure emqx node name:
## 1. configure node.name in emqx.conf
## 2. override with environment variable EMQX_NODE__NAME