diff --git a/bin/emqx b/bin/emqx index 0a967a73c..0f6a18437 100755 --- a/bin/emqx +++ b/bin/emqx @@ -85,7 +85,7 @@ die() { assert_node_alive() { if ! relx_nodetool "ping" > /dev/null; then - die "node_is_not_running!" 1 + exit 1 fi } @@ -230,6 +230,7 @@ usage() { } COMMAND="${1:-}" +GREP='grep --color=never' if [ -z "$COMMAND" ]; then usage 'help' @@ -299,6 +300,8 @@ fi # Make sure log directory exists mkdir -p "$RUNNER_LOG_DIR" +# turn off debug as this is static +set +x COMPATIBILITY_CHECK=' io:format("BEAM_OK~n", []), try @@ -321,50 +324,47 @@ COMPATIBILITY_CHECK=' end, halt(0). ' +[ "$DEBUG" -eq 1 ] && set -x compatiblity_info() { # RELEASE_LIB is used by Elixir # set crash-dump bytes to zero to ensure no crash dump is generated when erl crashes env ERL_CRASH_DUMP_BYTES=0 "$BINDIR/$PROGNAME" \ -noshell \ - -boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \ -boot "$REL_DIR/start_clean" \ + -boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \ -eval "$COMPATIBILITY_CHECK" } # Collect Erlang/OTP runtime sanity and compatibility in one go -if [ "$IS_BOOT_COMMAND" = 'yes' ]; then +maybe_use_portable_dynlibs() { # Read BUILD_INFO early as the next commands may mess up the shell BUILD_INFO="$(cat "${REL_DIR}/BUILD_INFO")" COMPATIBILITY_INFO="$(compatiblity_info 2>/dev/null || true)" - if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then + if ! (echo -e "$COMPATIBILITY_INFO" | $GREP -q 'CRYPTO_OK'); then ## failed to start, might be due to missing libs, try to be portable export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-$DYNLIBS_DIR}" if [ "$LD_LIBRARY_PATH" != "$DYNLIBS_DIR" ]; then export LD_LIBRARY_PATH="$DYNLIBS_DIR:$LD_LIBRARY_PATH" fi ## Turn off debug, because COMPATIBILITY_INFO needs to capture stderr - set +x COMPATIBILITY_INFO="$(compatiblity_info 2>&1 || true)" - if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'BEAM_OK'); then + if ! (echo -e "$COMPATIBILITY_INFO" | $GREP -q 'BEAM_OK'); then ## not able to start beam.smp - set +x logerr "$COMPATIBILITY_INFO" logerr "Please ensure it is running on the correct platform:" logerr "$BUILD_INFO" logerr "Version=$REL_VSN" logerr "Required dependencies: openssl-1.1.1 (libcrypto), libncurses and libatomic1" exit 1 - elif ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then + elif ! (echo -e "$COMPATIBILITY_INFO" | $GREP -q 'CRYPTO_OK'); then ## not able to start crypto app - set +x logerr "$COMPATIBILITY_INFO" exit 2 fi logwarn "Using libs from '${DYNLIBS_DIR}' due to missing from the OS." fi - [ "$DEBUG" -eq 1 ] && set -x -fi +} # Warn the user if ulimit -n is less than 1024 ULIMIT_F=$(ulimit -n) @@ -397,8 +397,6 @@ remsh() { # Generate a unique id used to allow multiple remsh to the same node # transparently id="remsh$(relx_gen_id)-${NAME}" - # Get the node's ticktime so that we use the same thing. - TICKTIME="$(relx_nodetool rpcterms net_kernel get_net_ticktime)" # shellcheck disable=SC2086 # Setup remote shell command to control node @@ -450,13 +448,16 @@ call_hocon() { ## Resolve boot configs in a batch ## This is because starting the Erlang beam with all modules loaded ## and parsing HOCON config + environment variables is a non-trivial task -CONF_KEYS=( 'node.data_dir' 'node.name' 'node.cookie' 'node.db_backend' 'cluster.proto_dist' ) +CONF_KEYS=( 'node.data_dir' 'node.name' 'node.cookie' 'node.db_backend' 'cluster.proto_dist' 'node.dist_net_ticktime' ) if [ "$IS_ENTERPRISE" = 'yes' ]; then CONF_KEYS+=( 'license.key' ) fi +# Turn off debug as the ps output can be quite noisy +set +x if [ "$IS_BOOT_COMMAND" = 'yes' ]; then [ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1 + maybe_use_portable_dynlibs if [ "${EMQX_BOOT_CONFIGS:-}" = '' ]; then EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")" ## export here so the 'console' command recursively called from @@ -464,37 +465,68 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then export EMQX_BOOT_CONFIGS fi else - # For non-boot commands, we try to get data_dir and ssl_dist_optfile from 'ps -ef' output + # For non-boot commands, we need below runtime facts to connect to the running node: + # 1. The running node name; + # 2. The Erlang cookie in use by the running node name; + # 3. SSL options if the node is using TLS for Erlang distribution; + # 4. Erlang kernel application's net_ticktime config. + # + # There are 3 sources of truth to get those runtime information. + # Listed in the order of preference: + # 1. The boot command (which can be inspected from 'ps -ef' command output) + # 2. The generated vm.