From 6b81d9965f715c890868dc4d62289e0ffc471163 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 16 Feb 2023 09:59:20 +0100 Subject: [PATCH 1/3] fix(bin/emqx): allow starting two nodes from the same installation If more than one node is boot from the same root directory try to find the node by node name set in EMQX_NODE_NAME or EMQX_NODE__NAME environment variable --- bin/emqx | 64 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/bin/emqx b/bin/emqx index 6f953b777..9211bd338 100755 --- a/bin/emqx +++ b/bin/emqx @@ -76,6 +76,12 @@ logwarn() { fi } +logdebug() { + if [ "$DEBUG" -eq 1 ]; then + echo "DEBUG: $*" + fi +} + die() { set +x logerr "$1" @@ -453,24 +459,37 @@ if [ "$IS_ENTERPRISE" = 'yes' ]; then CONF_KEYS+=( 'license.key' ) fi - -## Find the running node from 'ps -ef' -## The primary grep pattern is $RUNNER_ROOT_DIR because one can start multiple nodes at the same time -# shellcheck disable=SC2009 -PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)" -if [ -n "${PS_LINE}" ]; then - RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | wc -l)" -else - RUNNING_NODES_COUNT=0 +## To be backward compatible, read and then unset EMQX_NODE_NAME +if [ -n "${EMQX_NODE_NAME:-}" ]; then + export EMQX_NODE__NAME="${EMQX_NODE_NAME}" + unset EMQX_NODE_NAME fi # Turn off debug as the ps output can be quite noisy set +x + +## Find the running node from 'ps -ef' +## * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself +## * The running 'remsh' and 'nodetool' processes must be excluded +if [ -n "${EMQX_NODE__NAME:-}" ]; then + # if node name is provided, filter by node name + # shellcheck disable=SC2009 + PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -E "\s\-s?name\s${EMQX_NODE__NAME}" | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)" +else + # shellcheck disable=SC2009 + PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)" +fi +logdebug "PS_LINE=$PS_LINE" +RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | sed '/^\s*$/d' | wc -l)" +[ "$RUNNING_NODES_COUNT" -gt 1 ] && logdebug "More than one running node found: count=$RUNNING_NODES_COUNT" + if [ "$IS_BOOT_COMMAND" = 'yes' ]; then if [ "$RUNNING_NODES_COUNT" -gt 0 ] && [ "$COMMAND" != 'check_config' ]; then - tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true) - echo "Node ${tmp_nodename} is already running!" - exit 1 + running_node_name=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true) + if [ -n "$running_node_name" ] && [ "$running_node_name" = "${EMQX_NODE__NAME:-}" ]; then + echo "Node ${running_node_name} is already running!" + exit 1 + fi fi [ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1 maybe_use_portable_dynlibs @@ -502,8 +521,6 @@ else # then update the config in the file to 'node.name = "emqx@local.net"', after this change, # there would be no way stop the running node 'emqx@127.0.0.1', because 'emqx stop' command # would try to stop the new node instead. - # * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself - # * The running 'remsh' and 'nodetool' processes must be excluded if [ "$RUNNING_NODES_COUNT" -eq 1 ]; then ## only one emqx node is running, get running args from 'ps -ef' output tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true) @@ -520,14 +537,22 @@ else ## Make the format like what call_hocon multi_get prints out, but only need 4 args EMQX_BOOT_CONFIGS="node.name=${tmp_nodename}\nnode.cookie=${tmp_cookie}\ncluster.proto_dist=${tmp_proto}\nnode.dist_net_ticktime=$tmp_ticktime\nnode.data_dir=${tmp_datadir}" else - ## None or more than one node is running, resolve from boot config - ## we have no choiece but to read the bootstrap config (with environment overrides available in the current shell) + if [ "$RUNNING_NODES_COUNT" -gt 1 ]; then + if [ -z "${EMQX_NODE__NAME:-}" ]; then + tmp_nodenames=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' | tr '\n' ' ') + logerr "More than one EMQX node found running (root dir: ${RUNNER_ROOT_DIR})" + logerr "Running nodes: $tmp_nodenames" + logerr "Make sure environment variable EMQX_NODE__NAME is set to indicate for which node this command is intended." + exit 1 + fi + fi + ## We have no choiece but to read the bootstrap config (with environment overrides available in the current shell) [ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1 maybe_use_portable_dynlibs EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")" fi fi -[ "$DEBUG" -eq 1 ] && echo "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS" +logdebug "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS" [ "$DEBUG" -eq 1 ] && set -x get_boot_config() { @@ -877,11 +902,6 @@ maybe_log_to_console() { fi } -## To be backward compatible, read and then unset EMQX_NODE_NAME -if [ -n "${EMQX_NODE_NAME:-}" ]; then - export EMQX_NODE__NAME="${EMQX_NODE_NAME}" - unset EMQX_NODE_NAME -fi ## Possible ways to configure emqx node name: ## 1. configure node.name in emqx.conf ## 2. override with environment variable EMQX_NODE__NAME From 8fcce53a0d4d8ac11188e6b38af00daaea5c5e80 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 16 Feb 2023 11:17:04 +0100 Subject: [PATCH 2/3] test: add script to run two nodes in host from the same build --- .../{ => test}/start-two-nodes-in-docker.sh | 0 scripts/test/start-two-nodes-in-host.sh | 63 +++++++++++++++++++ 2 files changed, 63 insertions(+) rename scripts/{ => test}/start-two-nodes-in-docker.sh (100%) create mode 100755 scripts/test/start-two-nodes-in-host.sh diff --git a/scripts/start-two-nodes-in-docker.sh b/scripts/test/start-two-nodes-in-docker.sh similarity index 100% rename from scripts/start-two-nodes-in-docker.sh rename to scripts/test/start-two-nodes-in-docker.sh diff --git a/scripts/test/start-two-nodes-in-host.sh b/scripts/test/start-two-nodes-in-host.sh new file mode 100755 index 000000000..7dbd6009e --- /dev/null +++ b/scripts/test/start-two-nodes-in-host.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +set -euo pipefail + +## This starts two nodes on the same host (not in docker). +## The listener ports are shifted with an offset to avoid clashing. +## The data and log directories are configured to use ./tmp/ + +## By default, the boot script is ./_build/emqx/rel/emqx +## it can be overriden with arg1 and arg2 for the two nodes respectfully + +# ensure dir +cd -P -- "$(dirname -- "$0")/../../" + +DEFAULT_BOOT='./_build/emqx/rel/emqx/bin/emqx' +BOOT1="${1:-$DEFAULT_BOOT}" +BOOT2="${2:-$BOOT1}" + +DATA1="$(pwd)/tmp/emqx1/data" +LOG1="$(pwd)/tmp/emqx1/log" +DATA2="$(pwd)/tmp/emqx2/data" +LOG2="$(pwd)/tmp/emqx2/log" + +mkdir -p "$DATA1" "$DATA2" "$LOG1" "$LOG2" + +echo "Stopping emqx1" +env EMQX_NODE_NAME='emqx1@127.0.0.1' \ + ./_build/emqx/rel/emqx/bin/emqx stop || true + +echo "Stopping emqx2" +env EMQX_NODE_NAME='emqx2@127.0.0.1' \ + ./_build/emqx/rel/emqx/bin/emqx stop || true + +## Fork-start node1, otherwise it'll keep waiting for node2 because we are using static cluster +env DEBUG="${DEBUG:-0}" \ + EMQX_CLUSTER__STATIC__SEEDS='["emqx1@127.0.0.1","emqx2@127.0.0.1"]' \ + EMQX_CLUSTER__DISCOVERY_STRATEGY=static \ + EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL="${EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL:-debug}" \ + EMQX_LOG__FILE_HANDLERS__DEFAULT__FILE="$LOG1/emqx.log" \ + EMQX_NODE_NAME='emqx1@127.0.0.1' \ + EMQX_LOG_DIR="$LOG1" \ + EMQX_NODE__DATA_DIR="$DATA1" \ + EMQX_LISTENERS__TCP__DEFAULT__BIND='0.0.0.0:1881' \ + EMQX_LISTENERS__SSL__DEFAULT__BIND='0.0.0.0:8881' \ + EMQX_LISTENERS__WS__DEFAULT__BIND='0.0.0.0:8081' \ + EMQX_LISTENERS__WSS__DEFAULT__BIND='0.0.0.0:8084' \ + EMQX_DASHBOARD__LISTENERS__HTTP__BIND='0.0.0.0:18081' \ + "$BOOT1" start & + +env DEBUG="${DEBUG:-0}" \ + EMQX_CLUSTER__STATIC__SEEDS='["emqx1@127.0.0.1","emqx2@127.0.0.1"]' \ + EMQX_CLUSTER__DISCOVERY_STRATEGY=static \ + EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL="${EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL:-debug}" \ + EMQX_LOG__FILE_HANDLERS__DEFAULT__FILE="$LOG2/emqx.log" \ + EMQX_NODE_NAME='emqx2@127.0.0.1' \ + EMQX_LOG_DIR="$LOG2" \ + EMQX_NODE__DATA_DIR="$DATA2" \ + EMQX_LISTENERS__TCP__DEFAULT__BIND='0.0.0.0:1882' \ + EMQX_LISTENERS__SSL__DEFAULT__BIND='0.0.0.0:8882' \ + EMQX_LISTENERS__WS__DEFAULT__BIND='0.0.0.0:8082' \ + EMQX_LISTENERS__WSS__DEFAULT__BIND='0.0.0.0:8085' \ + EMQX_DASHBOARD__LISTENERS__HTTP__BIND='0.0.0.0:18082' \ + "$BOOT2" start From fea310cf932cd25364f7b161e749db67652d8143 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 17 Feb 2023 13:15:22 +0100 Subject: [PATCH 3/3] test: use different loop back address in script which starts two nodes --- scripts/test/start-two-nodes-in-host.sh | 86 +++++++++++++------------ 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/scripts/test/start-two-nodes-in-host.sh b/scripts/test/start-two-nodes-in-host.sh index 7dbd6009e..417df54e5 100755 --- a/scripts/test/start-two-nodes-in-host.sh +++ b/scripts/test/start-two-nodes-in-host.sh @@ -13,51 +13,57 @@ set -euo pipefail cd -P -- "$(dirname -- "$0")/../../" DEFAULT_BOOT='./_build/emqx/rel/emqx/bin/emqx' + BOOT1="${1:-$DEFAULT_BOOT}" BOOT2="${2:-$BOOT1}" -DATA1="$(pwd)/tmp/emqx1/data" -LOG1="$(pwd)/tmp/emqx1/log" -DATA2="$(pwd)/tmp/emqx2/data" -LOG2="$(pwd)/tmp/emqx2/log" +export IP1='127.0.0.1' +export IP2='127.0.0.2' -mkdir -p "$DATA1" "$DATA2" "$LOG1" "$LOG2" +# cannot use the same node name even IPs are different because Erlang distribution listens on 0.0.0.0 +NODE1="emqx1@$IP1" +NODE2="emqx2@$IP2" -echo "Stopping emqx1" -env EMQX_NODE_NAME='emqx1@127.0.0.1' \ - ./_build/emqx/rel/emqx/bin/emqx stop || true +start_cmd() { + local index="$1" + local nodehome + nodehome="$(pwd)/tmp/emqx${index}" + [ "$index" -eq 1 ] && BOOT_SCRIPT="$BOOT1" + [ "$index" -eq 2 ] && BOOT_SCRIPT="$BOOT2" + mkdir -p "${nodehome}/data" "${nodehome}/log" + cat <<-EOF +env DEBUG="${DEBUG:-0}" \ +EMQX_CLUSTER__STATIC__SEEDS="[\"$NODE1\",\"$NODE2\"]" \ +EMQX_CLUSTER__DISCOVERY_STRATEGY=static \ +EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL="${EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL:-debug}" \ +EMQX_LOG__FILE_HANDLERS__DEFAULT__FILE="${nodehome}/log/emqx.log" \ +EMQX_NODE_NAME="emqx${index}@\$IP${index}" \ +EMQX_NODE__COOKIE="${EMQX_NODE__COOKIE:-cookie1}" \ +EMQX_LOG_DIR="${nodehome}/log" \ +EMQX_NODE__DATA_DIR="${nodehome}/data" \ +EMQX_LISTENERS__TCP__DEFAULT__BIND="\$IP${index}:1883" \ +EMQX_LISTENERS__SSL__DEFAULT__BIND="\$IP${index}:8883" \ +EMQX_LISTENERS__WS__DEFAULT__BIND="\$IP${index}:8083" \ +EMQX_LISTENERS__WSS__DEFAULT__BIND="\$IP${index}:8084" \ +EMQX_DASHBOARD__LISTENERS__HTTP__BIND="\$IP${index}:18083" \ +$BOOT_SCRIPT start +EOF +} -echo "Stopping emqx2" -env EMQX_NODE_NAME='emqx2@127.0.0.1' \ - ./_build/emqx/rel/emqx/bin/emqx stop || true +echo "Stopping $NODE1" +env EMQX_NODE_NAME="$NODE1" ./_build/emqx/rel/emqx/bin/emqx stop || true + +echo "Stopping $NODE2" +env EMQX_NODE_NAME="$NODE2" ./_build/emqx/rel/emqx/bin/emqx stop || true + +start_one_node() { + local index="$1" + local cmd + cmd="$(start_cmd "$index" | envsubst)" + echo "$cmd" + eval "$cmd" +} ## Fork-start node1, otherwise it'll keep waiting for node2 because we are using static cluster -env DEBUG="${DEBUG:-0}" \ - EMQX_CLUSTER__STATIC__SEEDS='["emqx1@127.0.0.1","emqx2@127.0.0.1"]' \ - EMQX_CLUSTER__DISCOVERY_STRATEGY=static \ - EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL="${EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL:-debug}" \ - EMQX_LOG__FILE_HANDLERS__DEFAULT__FILE="$LOG1/emqx.log" \ - EMQX_NODE_NAME='emqx1@127.0.0.1' \ - EMQX_LOG_DIR="$LOG1" \ - EMQX_NODE__DATA_DIR="$DATA1" \ - EMQX_LISTENERS__TCP__DEFAULT__BIND='0.0.0.0:1881' \ - EMQX_LISTENERS__SSL__DEFAULT__BIND='0.0.0.0:8881' \ - EMQX_LISTENERS__WS__DEFAULT__BIND='0.0.0.0:8081' \ - EMQX_LISTENERS__WSS__DEFAULT__BIND='0.0.0.0:8084' \ - EMQX_DASHBOARD__LISTENERS__HTTP__BIND='0.0.0.0:18081' \ - "$BOOT1" start & - -env DEBUG="${DEBUG:-0}" \ - EMQX_CLUSTER__STATIC__SEEDS='["emqx1@127.0.0.1","emqx2@127.0.0.1"]' \ - EMQX_CLUSTER__DISCOVERY_STRATEGY=static \ - EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL="${EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL:-debug}" \ - EMQX_LOG__FILE_HANDLERS__DEFAULT__FILE="$LOG2/emqx.log" \ - EMQX_NODE_NAME='emqx2@127.0.0.1' \ - EMQX_LOG_DIR="$LOG2" \ - EMQX_NODE__DATA_DIR="$DATA2" \ - EMQX_LISTENERS__TCP__DEFAULT__BIND='0.0.0.0:1882' \ - EMQX_LISTENERS__SSL__DEFAULT__BIND='0.0.0.0:8882' \ - EMQX_LISTENERS__WS__DEFAULT__BIND='0.0.0.0:8082' \ - EMQX_LISTENERS__WSS__DEFAULT__BIND='0.0.0.0:8085' \ - EMQX_DASHBOARD__LISTENERS__HTTP__BIND='0.0.0.0:18082' \ - "$BOOT2" start +start_one_node 1 & +start_one_node 2