diff --git a/bin/emqx b/bin/emqx index 6f953b777..9211bd338 100755 --- a/bin/emqx +++ b/bin/emqx @@ -76,6 +76,12 @@ logwarn() { fi } +logdebug() { + if [ "$DEBUG" -eq 1 ]; then + echo "DEBUG: $*" + fi +} + die() { set +x logerr "$1" @@ -453,24 +459,37 @@ if [ "$IS_ENTERPRISE" = 'yes' ]; then CONF_KEYS+=( 'license.key' ) fi - -## Find the running node from 'ps -ef' -## The primary grep pattern is $RUNNER_ROOT_DIR because one can start multiple nodes at the same time -# shellcheck disable=SC2009 -PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)" -if [ -n "${PS_LINE}" ]; then - RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | wc -l)" -else - RUNNING_NODES_COUNT=0 +## To be backward compatible, read and then unset EMQX_NODE_NAME +if [ -n "${EMQX_NODE_NAME:-}" ]; then + export EMQX_NODE__NAME="${EMQX_NODE_NAME}" + unset EMQX_NODE_NAME fi # Turn off debug as the ps output can be quite noisy set +x + +## Find the running node from 'ps -ef' +## * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself +## * The running 'remsh' and 'nodetool' processes must be excluded +if [ -n "${EMQX_NODE__NAME:-}" ]; then + # if node name is provided, filter by node name + # shellcheck disable=SC2009 + PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -E "\s\-s?name\s${EMQX_NODE__NAME}" | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)" +else + # shellcheck disable=SC2009 + PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)" +fi +logdebug "PS_LINE=$PS_LINE" +RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | sed '/^\s*$/d' | wc -l)" +[ "$RUNNING_NODES_COUNT" -gt 1 ] && logdebug "More than one running node found: count=$RUNNING_NODES_COUNT" + if [ "$IS_BOOT_COMMAND" = 'yes' ]; then if [ "$RUNNING_NODES_COUNT" -gt 0 ] && [ "$COMMAND" != 'check_config' ]; then - tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true) - echo "Node ${tmp_nodename} is already running!" - exit 1 + running_node_name=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true) + if [ -n "$running_node_name" ] && [ "$running_node_name" = "${EMQX_NODE__NAME:-}" ]; then + echo "Node ${running_node_name} is already running!" + exit 1 + fi fi [ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1 maybe_use_portable_dynlibs @@ -502,8 +521,6 @@ else # then update the config in the file to 'node.name = "emqx@local.net"', after this change, # there would be no way stop the running node 'emqx@127.0.0.1', because 'emqx stop' command # would try to stop the new node instead. - # * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself - # * The running 'remsh' and 'nodetool' processes must be excluded if [ "$RUNNING_NODES_COUNT" -eq 1 ]; then ## only one emqx node is running, get running args from 'ps -ef' output tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true) @@ -520,14 +537,22 @@ else ## Make the format like what call_hocon multi_get prints out, but only need 4 args EMQX_BOOT_CONFIGS="node.name=${tmp_nodename}\nnode.cookie=${tmp_cookie}\ncluster.proto_dist=${tmp_proto}\nnode.dist_net_ticktime=$tmp_ticktime\nnode.data_dir=${tmp_datadir}" else - ## None or more than one node is running, resolve from boot config - ## we have no choiece but to read the bootstrap config (with environment overrides available in the current shell) + if [ "$RUNNING_NODES_COUNT" -gt 1 ]; then + if [ -z "${EMQX_NODE__NAME:-}" ]; then + tmp_nodenames=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' | tr '\n' ' ') + logerr "More than one EMQX node found running (root dir: ${RUNNER_ROOT_DIR})" + logerr "Running nodes: $tmp_nodenames" + logerr "Make sure environment variable EMQX_NODE__NAME is set to indicate for which node this command is intended." + exit 1 + fi + fi + ## We have no choiece but to read the bootstrap config (with environment overrides available in the current shell) [ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1 maybe_use_portable_dynlibs EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")" fi fi -[ "$DEBUG" -eq 1 ] && echo "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS" +logdebug "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS" [ "$DEBUG" -eq 1 ] && set -x get_boot_config() { @@ -877,11 +902,6 @@ maybe_log_to_console() { fi } -## To be backward compatible, read and then unset EMQX_NODE_NAME -if [ -n "${EMQX_NODE_NAME:-}" ]; then - export EMQX_NODE__NAME="${EMQX_NODE_NAME}" - unset EMQX_NODE_NAME -fi ## Possible ways to configure emqx node name: ## 1. configure node.name in emqx.conf ## 2. override with environment variable EMQX_NODE__NAME diff --git a/scripts/start-two-nodes-in-docker.sh b/scripts/test/start-two-nodes-in-docker.sh similarity index 100% rename from scripts/start-two-nodes-in-docker.sh rename to scripts/test/start-two-nodes-in-docker.sh diff --git a/scripts/test/start-two-nodes-in-host.sh b/scripts/test/start-two-nodes-in-host.sh new file mode 100755 index 000000000..417df54e5 --- /dev/null +++ b/scripts/test/start-two-nodes-in-host.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +set -euo pipefail + +## This starts two nodes on the same host (not in docker). +## The listener ports are shifted with an offset to avoid clashing. +## The data and log directories are configured to use ./tmp/ + +## By default, the boot script is ./_build/emqx/rel/emqx +## it can be overriden with arg1 and arg2 for the two nodes respectfully + +# ensure dir +cd -P -- "$(dirname -- "$0")/../../" + +DEFAULT_BOOT='./_build/emqx/rel/emqx/bin/emqx' + +BOOT1="${1:-$DEFAULT_BOOT}" +BOOT2="${2:-$BOOT1}" + +export IP1='127.0.0.1' +export IP2='127.0.0.2' + +# cannot use the same node name even IPs are different because Erlang distribution listens on 0.0.0.0 +NODE1="emqx1@$IP1" +NODE2="emqx2@$IP2" + +start_cmd() { + local index="$1" + local nodehome + nodehome="$(pwd)/tmp/emqx${index}" + [ "$index" -eq 1 ] && BOOT_SCRIPT="$BOOT1" + [ "$index" -eq 2 ] && BOOT_SCRIPT="$BOOT2" + mkdir -p "${nodehome}/data" "${nodehome}/log" + cat <<-EOF +env DEBUG="${DEBUG:-0}" \ +EMQX_CLUSTER__STATIC__SEEDS="[\"$NODE1\",\"$NODE2\"]" \ +EMQX_CLUSTER__DISCOVERY_STRATEGY=static \ +EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL="${EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL:-debug}" \ +EMQX_LOG__FILE_HANDLERS__DEFAULT__FILE="${nodehome}/log/emqx.log" \ +EMQX_NODE_NAME="emqx${index}@\$IP${index}" \ +EMQX_NODE__COOKIE="${EMQX_NODE__COOKIE:-cookie1}" \ +EMQX_LOG_DIR="${nodehome}/log" \ +EMQX_NODE__DATA_DIR="${nodehome}/data" \ +EMQX_LISTENERS__TCP__DEFAULT__BIND="\$IP${index}:1883" \ +EMQX_LISTENERS__SSL__DEFAULT__BIND="\$IP${index}:8883" \ +EMQX_LISTENERS__WS__DEFAULT__BIND="\$IP${index}:8083" \ +EMQX_LISTENERS__WSS__DEFAULT__BIND="\$IP${index}:8084" \ +EMQX_DASHBOARD__LISTENERS__HTTP__BIND="\$IP${index}:18083" \ +$BOOT_SCRIPT start +EOF +} + +echo "Stopping $NODE1" +env EMQX_NODE_NAME="$NODE1" ./_build/emqx/rel/emqx/bin/emqx stop || true + +echo "Stopping $NODE2" +env EMQX_NODE_NAME="$NODE2" ./_build/emqx/rel/emqx/bin/emqx stop || true + +start_one_node() { + local index="$1" + local cmd + cmd="$(start_cmd "$index" | envsubst)" + echo "$cmd" + eval "$cmd" +} + +## Fork-start node1, otherwise it'll keep waiting for node2 because we are using static cluster +start_one_node 1 & +start_one_node 2