emqx/bin/emqx

#!/usr/bin/env bash
# -*- tab-width:4;indent-tabs-mode:nil -*-
# ex: ts=4 sw=4 et

set -euo pipefail

DEBUG="${DEBUG:-0}"
[ "$DEBUG" -eq 1 ] && set -x

# We need to find real directory with emqx files on all platforms
# even when bin/emqx is symlinked on several levels
# - readlink -f works perfectly, but `-f` flag has completely different meaning in BSD version,
#   so we can't use it universally.
# - `stat -f%R` on MacOS does exactly what `readlink -f` does on Linux, but we can't use it
#   as a universal solution either because GNU stat has different syntax and this argument is invalid.
#   Also, version of stat which supports this syntax is only available since MacOS 12
if [ "$(uname -s)" == 'Darwin' ]; then
    product_version="$(sw_vers -productVersion | cut -d '.' -f 1)"
    if [ "$product_version" -ge 12 ]; then
        # if homebrew coreutils package is installed, GNU version of stat can take precedence,
        # so we use absolute path to ensure we are calling MacOS default
        RUNNER_ROOT_DIR="$(cd "$(dirname "$(/usr/bin/stat -f%R "$0" || echo "$0")")"/..; pwd -P)"
    else
        # try our best to resolve link on MacOS <= 11
        RUNNER_ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)"
    fi
else
    RUNNER_ROOT_DIR="$(cd "$(dirname "$(realpath "$0" || echo "$0")")"/..; pwd -P)"
fi

# shellcheck disable=SC1090,SC1091
. "$RUNNER_ROOT_DIR"/releases/emqx_vars

# defined in emqx_vars
export RUNNER_ROOT_DIR
export EMQX_ETC_DIR
export REL_VSN
export SCHEMA_MOD

RUNNER_SCRIPT="$RUNNER_BIN_DIR/$REL_NAME"
CODE_LOADING_MODE="${CODE_LOADING_MODE:-embedded}"
REL_DIR="$RUNNER_ROOT_DIR/releases/$REL_VSN"

WHOAMI=$(whoami)

# hocon try to read environment variables starting with "EMQX_"
export HOCON_ENV_OVERRIDE_PREFIX='EMQX_'

export ERTS_DIR="$RUNNER_ROOT_DIR/erts-$ERTS_VSN"
export BINDIR="$ERTS_DIR/bin"
export EMU="beam"
export PROGNAME="erl"
export ERTS_LIB_DIR="$RUNNER_ROOT_DIR/lib"
DYNLIBS_DIR="$RUNNER_ROOT_DIR/dynlibs"

logerr() {
    if [ "${TERM:-dumb}" = dumb ]; then
        echo -e "ERROR: $*" 1>&2
    else
        echo -e "$(tput setaf 1)ERROR: $*$(tput sgr0)" 1>&2
    fi
}

logwarn() {
    if [ "${TERM:-dumb}" = dumb ]; then
        echo "WARNING: $*"
    else
        echo "$(tput setaf 3)WARNING: $*$(tput sgr0)"
    fi
}

die() {
    set +x
    logerr "$1"
    errno=${2:-1}
    exit "$errno"
}

assert_node_alive() {
    if ! relx_nodetool "ping" > /dev/null; then
        die "node_is_not_running!" 1
    fi
}

usage() {
    local command="$1"

    case "$command" in
    start)
        echo "Start EMQX service in daemon mode"
        ;;
    stop)
        echo "Stop the running EMQX program"
        ;;
    console)
        echo "Boot up EMQX service in an interactive Erlang or Elixir shell"
        echo "This command needs a tty"
        ;;
    console_clean)
        echo "This command does NOT boot up the EMQX service"
        echo "It only starts an interactive Erlang or Elixir console with all the"
        echo "EMQX code available"
        ;;
    foreground)
        echo "Start EMQX in foreground mode without an interactive shell"
        ;;
    pid)
        echo "Print out EMQX process identifier"
        ;;
    ping)
        echo "Check if the EMQX node is up and running"
        echo "This command exit with 0 silently if node is running"
        ;;
    escript)
        echo "Execute a escript using the Erlang runtime from EMQX package installation"
        echo "For example $REL_NAME escript /path/to/my/escript my_arg1 my_arg2"
        ;;
    attach)
        echo "This command is applicable when EMQX is started in daemon mode."
        echo "It attaches the current shell to EMQX's control console"
        echo "through a named pipe."
        logwarn "try to use the safer alternative, remote_console command."
        ;;
    remote_console)
        echo "Start an interactive shell running an Erlang or Elixir node which "
        echo "hidden-connects to the running EMQX node".
        echo "This command is mostly used for troubleshooting."
        ;;
    ertspath)
        echo "Print path to Erlang runtime bin dir"
        ;;
    rpc)
        echo "Usage: $REL_NAME rpc MODULE FUNCTION [ARGS, ...]"
        echo "Connect to the EMQX node and make an Erlang RPC"
        echo "This command blocks for at most 60 seconds."
        echo "It exits with non-zero code in case of any RPC failure"
        echo "including connection error and runtime exception"
        ;;
    rpcterms)
        echo "Usage: $REL_NAME rpcterms MODULE FUNCTION [ARGS, ...]"
        echo "Connect to the EMQX node and make an Erlang RPC"
        echo "The result of the RPC call is pretty-printed as an "
        echo "Erlang term"
        ;;
    root_dir)
        echo "Print EMQX installation root dir"
        ;;
    eval)
        echo "Evaluate an Erlang or Elixir expression in the EMQX node"
        ;;
    eval-erl)
        echo "Evaluate an Erlang expression in the EMQX node, even on Elixir node"
        ;;
    versions)
        echo "List installed EMQX versions and their status"
        ;;
    unpack)
        echo "Usage: $REL_NAME unpack [VERSION]"
        echo "Unpacks a release package VERSION, it assumes that this"
        echo "release package tarball has already been deployed at one"
        echo "of the following locations:"
        echo "      releases/<relname>-<version>.tar.gz"
        ;;
    install)
        echo "Usage: $REL_NAME install [VERSION]"
        echo "Installs a release package VERSION, it assumes that this"
        echo "release package tarball has already been deployed at one"
        echo "of the following locations:"
        echo "      releases/<relname>-<version>.tar.gz"
        echo ""
        echo "     --no-permanent   Install release package VERSION but"
        echo "                      don't make it permanent"
        ;;
    uninstall)
        echo "Usage: $REL_NAME uninstall [VERSION]"
        echo "Uninstalls a release VERSION, it will only accept"
        echo "versions that are not currently in use"
        ;;
    upgrade)
        echo "Usage: $REL_NAME upgrade [VERSION]"
        echo "Upgrades the currently running release to VERSION, it assumes"
        echo "that a release package tarball has already been deployed at one"
        echo "of the following locations:"
        echo "      releases/<relname>-<version>.tar.gz"
        echo ""
        echo "     --no-permanent   Install release package VERSION but"
        echo "                      don't make it permanent"
        ;;
    downgrade)
        echo "Usage: $REL_NAME downgrade [VERSION]"
        echo "Downgrades the currently running release to VERSION, it assumes"
        echo "that a release package tarball has already been deployed at one"
        echo "of the following locations:"
        echo "      releases/<relname>-<version>.tar.gz"
        echo ""
        echo "     --no-permanent   Install release package VERSION but"
        echo "                      don't make it permanent"
        ;;
    check_config)
        echo "Checks the EMQX config without generating any files"
        ;;
    *)
        echo "Usage: $REL_NAME COMMAND [help]"
        echo ''
        echo "Commonly used COMMANDs:"
        echo "  start:      Start EMQX in daemon mode"
        echo "  console:    Start EMQX in an interactive Erlang or Elixir shell"
        echo "  foreground: Start EMQX in foreground mode without an interactive shell"
        echo "  stop:       Stop the running EMQX node"
        echo "  ctl:        Administration commands, execute '$REL_NAME ctl help' for more details"
        echo ''
        echo "More:"
        echo "  Shell attach:  remote_console | attach"
        echo "  Up/Down-grade: upgrade | downgrade | install | uninstall"
        echo "  Install info:  ertspath | root_dir"
        echo "  Runtime info:  pid | ping | versions"
        echo "  Validate Config:  check_config"
        echo "  Advanced:      console_clean | escript | rpc | rpcterms | eval | eval-erl"
        echo ''
        echo "Execute '$REL_NAME COMMAND help' for more information"
    ;;
    esac
}

COMMAND="${1:-}"

if [ -z "$COMMAND" ]; then
    usage 'help'
    exit 1
elif [ "$COMMAND" = 'help' ]; then
    usage 'help'
    exit 0
fi

if [ "${2:-}" = 'help' ]; then
    ## 'ctl' command has its own usage info
    if [ "$COMMAND" != 'ctl' ]; then
        usage "$COMMAND"
        exit 0
    fi
fi

## IS_BOOT_COMMAND is set for later to inspect node name and cookie from hocon config (or env variable)
case "${COMMAND}" in
    start|console|console_clean|foreground|check_config)
        IS_BOOT_COMMAND='yes'
        ;;
    ertspath)
        echo "$ERTS_DIR"
        exit 0
        ;;
    root_dir)
        echo "$RUNNER_ROOT_DIR"
        exit 0
        ;;
    *)
        IS_BOOT_COMMAND='no'
        ;;
esac

## backward compatible
if [ -d "$ERTS_DIR/lib" ]; then
    export LD_LIBRARY_PATH="$ERTS_DIR/lib:$LD_LIBRARY_PATH"
fi

# Simple way to check the correct user and fail early
check_user() {
    # Validate that the user running the script is the owner of the
    # RUN_DIR.
    if [ "$RUNNER_USER" ] && [ "x$WHOAMI" != "x$RUNNER_USER" ]; then
        if [ "x$WHOAMI" != "xroot" ]; then
            echo "You need to be root or use sudo to run this command"
            exit 1
        fi
        CMD="DEBUG=$DEBUG \"$RUNNER_SCRIPT\" "
        for ARG in "$@"; do
            CMD="${CMD} \"$ARG\""
        done
        # This will drop privileges into the runner user
        # It exec's in a new shell and the current shell will exit
        exec su - "$RUNNER_USER" -c "$CMD"
    fi
}

# Make sure the user running this script is the owner and/or su to that user
check_user "$@"
ES=$?
if [ "$ES" -ne 0 ]; then
    exit $ES
fi

# Make sure log directory exists
mkdir -p "$RUNNER_LOG_DIR"

COMPATIBILITY_CHECK='
    io:format("BEAM_OK~n", []),
    try
        [_|_] = L = crypto:info_lib(),
        io:format("CRYPTO_OK ~0p~n", [L])
    catch
        _ : _ ->
            %% so logger has the chance to log something
            timer:sleep(100),
            halt(1)

    end,
    try
        mnesia_hook:module_info(),
        io:format("MNESIA_OK~n", [])
    catch
        _ : _ ->
            io:format("WARNING: Mnesia app has no post-coommit hook support~n", []),
            halt(2)
    end,
    halt(0).
'

compatiblity_info() {
  # RELEASE_LIB is used by Elixir
  # set crash-dump bytes to zero to ensure no crash dump is generated when erl crashes
  env ERL_CRASH_DUMP_BYTES=0 "$BINDIR/$PROGNAME" \
    -noshell \
    -boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
    -boot "$REL_DIR/start_clean" \
    -eval "$COMPATIBILITY_CHECK"
}

# Collect Erlang/OTP runtime sanity and compatibility in one go
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
    # Read BUILD_INFO early as the next commands may mess up the shell
    BUILD_INFO="$(cat "${REL_DIR}/BUILD_INFO")"
    COMPATIBILITY_INFO="$(compatiblity_info 2>/dev/null || true)"
    if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then
        ## failed to start, might be due to missing libs, try to be portable
        export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-$DYNLIBS_DIR}"
        if [ "$LD_LIBRARY_PATH" != "$DYNLIBS_DIR" ]; then
            export LD_LIBRARY_PATH="$DYNLIBS_DIR:$LD_LIBRARY_PATH"
        fi
        ## Turn off debug, because COMPATIBILITY_INFO needs to capture stderr
        set +x
        COMPATIBILITY_INFO="$(compatiblity_info 2>&1 || true)"
        if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'BEAM_OK'); then
            ## not able to start beam.smp
            set +x
            logerr "$COMPATIBILITY_INFO"
            logerr "Please ensure it is running on the correct platform:"
            logerr "$BUILD_INFO"
            logerr "Version=$REL_VSN"
            logerr "Required dependencies: openssl-1.1.1 (libcrypto), libncurses and libatomic1"
            exit 1
        elif ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then
            ## not able to start crypto app
            set +x
            logerr "$COMPATIBILITY_INFO"
            exit 2
        fi
        logerr "Using libs from '${DYNLIBS_DIR}' due to missing from the OS."
    fi
    [ "$DEBUG" -eq 1 ] && set -x
fi

# Warn the user if ulimit -n is less than 1024
ULIMIT_F=$(ulimit -n)
if [ "$ULIMIT_F" -lt 1024 ]; then
    logwarn "ulimit -n is ${ULIMIT_F}; 1024 is the recommended minimum."
fi

SED_REPLACE="sed -i "
case $(sed --help 2>&1) in
    *GNU*) SED_REPLACE="sed -i ";;
    *BusyBox*) SED_REPLACE="sed -i ";;
    *) SED_REPLACE="sed -i '' ";;
esac

# Get node pid
relx_get_pid() {
    if output="$(relx_nodetool rpcterms os getpid)"
    then
        # shellcheck disable=SC2001 # Escaped quote taken as closing quote in editor
        echo "$output" | sed -e 's/"//g'
        return 0
    else
        echo "$output"
        return 1
    fi
}

# Connect to a remote node
remsh() {
    # Generate a unique id used to allow multiple remsh to the same node
    # transparently
    id="remsh$(relx_gen_id)-${NAME}"
    # Get the node's ticktime so that we use the same thing.
    TICKTIME="$(relx_nodetool rpcterms net_kernel get_net_ticktime)"

    # shellcheck disable=SC2086
    # Setup remote shell command to control node
    if [ "$IS_ELIXIR" = no ] || [ "${EMQX_CONSOLE_FLAVOR:-}" = 'erl' ] ; then
        set -- "$BINDIR/erl" "$NAME_TYPE" "$id" \
            -remsh "$NAME" -boot "$REL_DIR/start_clean" \
            -boot_var ERTS_LIB_DIR "$ERTS_LIB_DIR" \
            -boot_var RELEASE_LIB "$ERTS_LIB_DIR" \
            -setcookie "$COOKIE" \
            -hidden \
            -kernel net_ticktime "$TICKTIME" \
            $EPMD_ARGS
    else
        set -- "$REL_DIR/iex" \
            --remsh "$NAME" \
            --boot-var RELEASE_LIB "$ERTS_LIB_DIR" \
            --cookie "$COOKIE" \
            --hidden \
            --erl "-kernel net_ticktime $TICKTIME" \
            --erl "$EPMD_ARGS" \
            --erl "$NAME_TYPE $id" \
            --boot "$REL_DIR/start_clean"
    fi
    exec "$@"
}

# Generate a random id
relx_gen_id() {
    od -t u -N 4 /dev/urandom | head -n1 | awk '{print $2 % 1000}'
}

call_nodetool() {
    "$ERTS_DIR/bin/escript" "$RUNNER_ROOT_DIR/bin/nodetool" "$@"
}

# Control a node
relx_nodetool() {
    command="$1"; shift
    ERL_FLAGS="${ERL_FLAGS:-} $EPMD_ARGS" \
    call_nodetool "$NAME_TYPE" "$NAME" \
                                -setcookie "$COOKIE" "$command" "$@"
}

call_hocon() {
    call_nodetool hocon "$@" \
        || die "call_hocon_failed: $*" $?
}

## Resolve boot configs in a batch
## This is because starting the Erlang beam with all modules loaded
## and parsing HOCON config + environment variables is a non-trivial task
CONF_KEYS=( 'node.data_dir' 'node.name' 'node.cookie' 'node.db_backend' 'cluster.proto_dist' )
if [ "$IS_ENTERPRISE" = 'yes' ]; then
    CONF_KEYS+=( 'license.key' )
fi

if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
    if [ "${EMQX_BOOT_CONFIGS:-}" = '' ]; then
        EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
        ## export here so the 'console' command recursively called from
        ## 'start' command does not have to parse the configs again
        export EMQX_BOOT_CONFIGS
    fi
else
    # For non-boot commands, we try to get data_dir and ssl_dist_optfile from 'ps -ef' output
    # shellcheck disable=SC2009
    PS_LINE="$(ps -ef | grep "\-[r]oot $RUNNER_ROOT_DIR" || true)"
    if [ "$(echo -e "$PS_LINE" | wc -l)" -eq 1 ]; then
        ## only one emqx node is running
        ## strip 'emqx_data_dir ' and ' --' because the dir in between may contain spaces
        DATA_DIR="$(echo -e "$PS_LINE" | grep -oE "\-emqx_data_dir.*" | sed -E 's#.+emqx_data_dir[[:blank:]]##g' | sed -E 's#[[:blank:]]--$##g' || true)"
        if [ "$DATA_DIR" = '' ]; then
            ## this should not happen unless -emqx_data_dir is not set
            die "node_is_not_running!" 1
        fi
        # get ssl_dist_optfile option
        SSL_DIST_OPTFILE="$(echo -e "$PS_LINE" | grep -oE '\-ssl_dist_optfile\s.+\s' | awk '{print $2}' || true)"
        if [ -z "$SSL_DIST_OPTFILE" ]; then
            EMQX_BOOT_CONFIGS="node.data_dir=${DATA_DIR}\ncluster.proto_dist=inet_tcp"
        else
            EMQX_BOOT_CONFIGS="node.data_dir=${DATA_DIR}\ncluster.proto_dist=inet_tls"
        fi
    else
        ## None or more than one node is running, resolve from boot config
        EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
    fi
fi

get_boot_config() {
    path_to_value="$1"
    echo -e "$EMQX_BOOT_CONFIGS" | grep "$path_to_value=" | sed -e "s/$path_to_value=//g" | tr -d \"
}

EPMD_ARGS="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka"
PROTO_DIST="$(get_boot_config 'cluster.proto_dist' || true)"
# this environment variable is required by ekka_dist module
# because proto_dist is overriden to ekka, and there is a lack of ekka_tls module
export EKKA_PROTO_DIST_MOD="${PROTO_DIST:-inet_tcp}"
if [ "$EKKA_PROTO_DIST_MOD" = 'inet_tls' ]; then
    if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
        SSL_DIST_OPTFILE=${EMQX_SSL_DIST_OPTFILE:-"$EMQX_ETC_DIR/ssl_dist.conf"}
        case "$SSL_DIST_OPTFILE" in
            *\ *)
                # there is unfortunately no way to support space for this option because we'd need to grep
                # from 'ps -ef' result to get this option for non-boot commands (nodetool) to run
                set +x
                logerr "Got space in: $SSL_DIST_OPTFILE"
                logerr "No space is allowed for Erlang distribution over SSL option file path."
                logerr "Configure it from environment variable EMQX_SSL_DIST_OPTFILE."
                logerr "Or make sure emqx root path '$RUNNER_ROOT_DIR' has no space"
                exit 1
                ;;
            *)
                true
                ;;
        esac
    fi
    EPMD_ARGS="${EPMD_ARGS} -ssl_dist_optfile $SSL_DIST_OPTFILE"
fi

DATA_DIR="$(get_boot_config 'node.data_dir')"
# ensure no trailing /
DATA_DIR="${DATA_DIR%/}"
if [[ $DATA_DIR != /* ]]; then
    # relative path
    DATA_DIR="${RUNNER_ROOT_DIR}/${DATA_DIR}"
fi
CONFIGS_DIR="$DATA_DIR/configs"
mkdir -p "$CONFIGS_DIR"

check_license() {
    if [ "$IS_ENTERPRISE" == "no" ]; then
        return 0
    fi

    key_license="${EMQX_LICENSE__KEY:-$(get_boot_config 'license.key')}"

    if [[ -n "$key_license" && ("$key_license" != "undefined") ]]; then
      call_nodetool check_license_key "$key_license"
    else
      set +x
      logerr "License not found."
      logerr "Please specify one via the EMQX_LICENSE__KEY variable"
      logerr "or via license.key in emqx-enterprise.conf."
      return 1
    fi
}

# Run an escript in the node's environment
relx_escript() {
    shift; scriptpath="$1"; shift
    "$ERTS_DIR/bin/escript" "$RUNNER_ROOT_DIR/$scriptpath" "$@"
}

# Output a start command for the last argument of run_erl
relx_start_command() {
    printf "exec \"%s\" \"%s\"" "$RUNNER_SCRIPT" \
           "$START_OPTION"
}

# Function to check configs without generating them
check_config() {
    ## this command checks the configs without generating any files
    call_hocon -v -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf check_schema
}

# Function to generate app.config and vm.args
# sets two environment variables CONF_FILE and ARGS_FILE
generate_config() {
    local name_type="$1"
    local node_name="$2"
    ## Delete the *.siz files first or it can't start after
    ## changing the config 'log.rotation.size'
    rm -rf "${RUNNER_LOG_DIR}"/*.siz

    ## timestamp for each generation
    local NOW_TIME
    NOW_TIME="$(date +'%Y.%m.%d.%H.%M.%S')"

    ## this command populates two files: app.<time>.config and vm.<time>.args
    ## NOTE: the generate command merges environment variables to the base config (emqx.conf),
    ## but does not include the cluster-override.conf and local-override.conf
    ## meaning, certain overrides will not be mapped to app.<time>.config file
    call_hocon -v -t "$NOW_TIME" -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf -d "$DATA_DIR"/configs generate

    ## filenames are per-hocon convention
    CONF_FILE="$CONFIGS_DIR/app.$NOW_TIME.config"
    ARGS_FILE="$CONFIGS_DIR/vm.$NOW_TIME.args"

    ## Merge hocon generated *.args into the vm.args
    TMP_ARG_FILE="$CONFIGS_DIR/vm.args.tmp"
    cp "$EMQX_ETC_DIR/vm.args" "$TMP_ARG_FILE"
    echo "" >> "$TMP_ARG_FILE"
    echo "-pa \"${REL_DIR}/consolidated\"" >> "$TMP_ARG_FILE"
    ## read lines from generated vm.<time>.args file
    ## drop comment lines, and empty lines using sed
    ## pipe the lines to a while loop
    sed '/^#/d' "$ARGS_FILE" | sed '/^$/d' | while IFS='' read -r ARG_LINE || [ -n "$ARG_LINE" ]; do
        ## in the loop, split the 'key[:space:]value' pair
        ARG_KEY=$(echo "$ARG_LINE" | awk '{$NF="";print}')
        ARG_VALUE=$(echo "$ARG_LINE" | awk '{print $NF}')
        ## use the key to look up in vm.args file for the value
        TMP_ARG_VALUE=$(grep "^$ARG_KEY" "$TMP_ARG_FILE" || true | awk '{print $NF}')
        ## compare generated (to override) value to original (to be overridden) value
        if [ "$ARG_VALUE" != "$TMP_ARG_VALUE" ] ; then
            ## if they are different
            if [ -n "$TMP_ARG_VALUE" ]; then
                ## if the old value is present, replace it with generated value
                sh -c "$SED_REPLACE 's|^$ARG_KEY.*$|$ARG_LINE|' \"$TMP_ARG_FILE\""
            else
                ## otherwise append generated value to the end
                echo "$ARG_LINE" >> "$TMP_ARG_FILE"
            fi
        fi
    done
    echo "$name_type $node_name" >> "$TMP_ARG_FILE"
    echo "-mnesia dir '\"$DATA_DIR/mnesia/$NAME\"'" >> "$TMP_ARG_FILE"
    ## rename the generated vm.<time>.args file
    mv -f "$TMP_ARG_FILE" "$ARGS_FILE"
}

# check if a PID is down
is_down() {
    PID="$1"
    if ps -p "$PID" >/dev/null; then
        # still around
        # shellcheck disable=SC2009 # this grep pattern is not a part of the progra names
        if ps -p "$PID" | grep -q 'defunct'; then
            # zombie state, print parent pid
            parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
            logwarn "$PID is marked <defunct>, parent: $(ps -p "$parent")"
            return 0
        fi
        return 1
    fi
    # it's gone
    return 0
}

wait_for() {
    local WAIT_TIME
    local CMD
    WAIT_TIME="$1"
    shift
    CMD="$*"
    while true; do
        if $CMD >/dev/null 2>&1; then
            return 0
        fi
        if [ "$WAIT_TIME" -le 0 ]; then
            return 1
        fi
        WAIT_TIME=$((WAIT_TIME - 1))
        sleep 1
    done
}

wait_until_return_val() {
    local RESULT
    local WAIT_TIME
    local CMD
    RESULT="$1"
    WAIT_TIME="$2"
    shift 2
    CMD="$*"
    while true; do
        if [ "$($CMD 2>/dev/null)" = "$RESULT" ]; then
            return 0
        fi
        if [ "$WAIT_TIME" -le 0 ]; then
            return 1
        fi
        WAIT_TIME=$((WAIT_TIME - 1))
        sleep 1
    done
}

latest_vm_args() {
    local hint_var_name="$1"
    local vm_args_file
    vm_args_file="$(find "$CONFIGS_DIR" -type f -name "vm.*.args" | sort | tail -1)"
    if [ -f "$vm_args_file" ]; then
        echo "$vm_args_file"
    else
        set +x
        logerr "Node not initialized?"
        logerr "Generated config file vm.*.args is not found for command '$COMMAND'"
        logerr "in config dir: $CONFIGS_DIR"
        logerr "In case the file has been deleted while the node is running,"
        logerr "set environment variable '$hint_var_name' to continue"
        exit 1
    fi
}

# backward compatible with 4.x
tr_log_to_env() {
    local log_to=${EMQX_LOG__TO:-undefined}
    # unset because it's unknown to 5.0
    unset EMQX_LOG__TO
    case "${log_to}" in
        console)
            export EMQX_LOG__CONSOLE_HANDLER__ENABLE='true'
            export EMQX_LOG__FILE_HANDLERS__DEFAULT__ENABLE='false'
            ;;
        file)
            export EMQX_LOG__CONSOLE_HANDLER__ENABLE='false'
            export EMQX_LOG__FILE_HANDLERS__DEFAULT__ENABLE='true'
            ;;
        both)
            export EMQX_LOG__CONSOLE_HANDLER__ENABLE='true'
            export EMQX_LOG__FILE_HANDLERS__DEFAULT__ENABLE='true'
            ;;
        default)
            # want to use config file defaults, do nothing
            ;;
        undefined)
            # value not set, do nothing
            ;;
        *)
            logerr "Unknown environment value for EMQX_LOG__TO=${log_to} discarded"
            ;;
    esac
}

maybe_log_to_console() {
    if [ "${EMQX_LOG__TO:-}" = 'default' ]; then
        # want to use config file defaults, do nothing
        unset EMQX_LOG__TO
    else
        tr_log_to_env
        # ensure defaults
        export EMQX_LOG__CONSOLE_HANDLER__ENABLE="${EMQX_LOG__CONSOLE_HANDLER__ENABLE:-true}"
        export EMQX_LOG__FILE_HANDLERS__DEFAULT__ENABLE="${EMQX_LOG__FILE_HANDLERS__DEFAULT__ENABLE:-false}"
    fi
}

if [ -n "${EMQX_NODE_NAME:-}" ]; then
    export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
    unset EMQX_NODE_NAME
fi
## Possible ways to configure emqx node name:
## 1. configure node.name in emqx.conf
## 2. override with environment variable EMQX_NODE__NAME
## Node name is either short-name (without '@'), e.g. 'emqx'
## or long name (with '@') e.g. 'emqx@example.net' or 'emqx@127.0.0.1'
NAME="${EMQX_NODE__NAME:-}"
if [ -z "$NAME" ]; then
    if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
        # for boot commands, inspect emqx.conf for node name
        NAME="$(get_boot_config 'node.name')"
    else
        vm_args_file="$(latest_vm_args 'EMQX_NODE__NAME')"
        NAME="$(grep -E '^-s?name' "${vm_args_file}" | awk '{print $2}')"
    fi
fi

# force to use 'emqx' short name
[ -z "$NAME" ] && NAME='emqx'

case "$NAME" in
    *@*)
        NAME_TYPE='-name'
       ;;
    *)
        NAME_TYPE='-sname'
esac
SHORT_NAME="$(echo "$NAME" | awk -F'@' '{print $1}')"
if ! (echo "$SHORT_NAME" | grep -q '^[0-9A-Za-z_\-]\+$'); then
    echo "Invalid node name, should be of format '^[0-9A-Za-z_-]+$'."
    exit 1
fi
# This also changes the program name from 'beam.smp' to node name
# e.g. the 'ps' command output
export ESCRIPT_NAME="$SHORT_NAME"

PIPE_DIR="${PIPE_DIR:-/$DATA_DIR/${WHOAMI}_erl_pipes/$NAME/}"

## Resolve Erlang cookie.
if [ -n "${EMQX_NODE_COOKIE:-}" ]; then
    ## To be backward compatible, read EMQX_NODE_COOKIE
    export EMQX_NODE__COOKIE="${EMQX_NODE_COOKIE}"
    unset EMQX_NODE_COOKIE
fi
COOKIE="${EMQX_NODE__COOKIE:-}"
if [ -z "$COOKIE" ]; then
    if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
        COOKIE="$(get_boot_config 'node.cookie')"
    else
        vm_args_file="$(latest_vm_args 'EMQX_NODE__COOKIE')"
        COOKIE="$(grep -E '^-setcookie' "${vm_args_file}" | awk '{print $2}')"
    fi
fi
[ -z "$COOKIE" ] && COOKIE="$EMQX_DEFAULT_ERLANG_COOKIE"
if [ $IS_BOOT_COMMAND = 'yes' ] && [ "$COOKIE" = "$EMQX_DEFAULT_ERLANG_COOKIE" ]; then
    logwarn "Default (insecure) Erlang cookie is in use."
    logwarn "Configure node.cookie in $EMQX_ETC_DIR/emqx.conf or override from environment variable EMQX_NODE__COOKIE"
    logwarn "Use the same config value for all nodes in the cluster."
fi

## check if OTP version has mnesia_hook feature; if not, fallback to
## using Mnesia DB backend.
if [[ "$IS_BOOT_COMMAND" == 'yes' && "$(get_boot_config 'node.db_backend')" == "rlog" ]]; then
    if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'MNESIA_OK'); then
      logerr "DB Backend is RLOG, but an incompatible OTP version has been detected. Falling back to using Mnesia DB backend."
      export EMQX_NODE__DB_BACKEND=mnesia
      export EMQX_NODE__DB_ROLE=core
    fi
fi

cd "$RUNNER_ROOT_DIR"

case "${COMMAND}" in
    start)
        # Make sure a node IS not running
        if relx_nodetool "ping" >/dev/null 2>&1; then
            die "node_is_already_running!"
        fi

        # this flag passes down to console mode
        # so we know it's intended to be run in daemon mode
        export _EMQX_START_DAEMON_MODE=1

        case "$COMMAND" in
            start)
                shift
                START_OPTION="console"
                HEART_OPTION="start"
                ;;
        esac
        RUN_PARAM="$*"

        # Set arguments for the heart command
        set -- "$RUNNER_SCRIPT" "$HEART_OPTION"
        [ "$RUN_PARAM" ] && set -- "$@" "$RUN_PARAM"

        # Export the HEART_COMMAND
        HEART_COMMAND="$RUNNER_SCRIPT $COMMAND"
        export HEART_COMMAND

        ## See: http://erlang.org/doc/man/run_erl.html
        # Export the RUN_ERL_LOG_GENERATIONS
        export RUN_ERL_LOG_GENERATIONS=${RUN_ERL_LOG_GENERATIONS:-"5"}

        # Export the RUN_ERL_LOG_MAXSIZE
        export RUN_ERL_LOG_MAXSIZE=${RUN_ERL_LOG_MAXSIZE:-"10485760"}

        mkdir -p "$PIPE_DIR"

        "$BINDIR/run_erl" -daemon "$PIPE_DIR" "$RUNNER_LOG_DIR" \
                          "$(relx_start_command)"

        WAIT_TIME=${EMQX_WAIT_FOR_START:-120}
        if wait_until_return_val "true" "$WAIT_TIME" 'relx_nodetool' \
                'eval' 'emqx:is_running()'; then
            echo "$EMQX_DESCRIPTION $REL_VSN is started successfully!"
            exit 0
        else
            echo "$EMQX_DESCRIPTION $REL_VSN failed to start in ${WAIT_TIME} seconds."
            echo "Please find more information in erlang.log.N"
            echo "Or run 'env DEBUG=1 $0 console' to have logs printed to console."
            exit 1
        fi
        ;;

    stop)
        # Wait for the node to completely stop...
        PID="$(relx_get_pid)"
        if ! relx_nodetool "stop"; then
            die "Graceful shutdown failed PID=[$PID]"
        fi
        WAIT_TIME="${EMQX_WAIT_FOR_STOP:-120}"
        if ! wait_for "$WAIT_TIME" 'is_down' "$PID"; then
            msg="dangling after ${WAIT_TIME} seconds"
            # also log to syslog
            logger -t "${REL_NAME}[${PID}]" "STOP: $msg"
            # log to user console
            set +x
            logerr "Stop failed, $msg"
            echo "ERROR: $PID is still around"
            ps -p "$PID"
            exit 1
        fi
        echo "ok"
        logger -t "${REL_NAME}[${PID}]" "STOP: OK"
        ;;

    pid)
        ## Get the VM's pid
        if ! relx_get_pid; then
            exit 1
        fi
        ;;

    ping)
        assert_node_alive
        echo pong
        ;;

    escript)
        ## Run an escript under the node's environment
        if ! relx_escript "$@"; then
            exit 1
        fi
        ;;

    attach)
        assert_node_alive

        shift
        exec "$BINDIR/to_erl" "$PIPE_DIR"
        ;;

    remote_console)
        assert_node_alive

        shift
        remsh
        ;;

    upgrade|downgrade|install|unpack|uninstall)
        if [ -z "${2:-}" ]; then
            echo "Missing version argument"
            echo "Usage: $REL_NAME $COMMAND {version}"
            exit 1
        fi

        shift

        assert_node_alive

        ERL_FLAGS="${ERL_FLAGS:-} $EPMD_ARGS" \
        exec "$BINDIR/escript" "$RUNNER_ROOT_DIR/bin/install_upgrade.escript" \
             "$COMMAND" "{'$REL_NAME', \"$NAME_TYPE\", '$NAME', '$COOKIE'}" "$@"
        ;;

    versions)
        assert_node_alive

        shift

        ERL_FLAGS="${ERL_FLAGS:-} $EPMD_ARGS" \
        exec "$BINDIR/escript" "$RUNNER_ROOT_DIR/bin/install_upgrade.escript" \
             "versions" "{'$REL_NAME', \"$NAME_TYPE\", '$NAME', '$COOKIE'}" "$@"
        ;;

    console|console_clean|foreground)
        # .boot file typically just $REL_NAME (ie, the app name)
        # however, for debugging, sometimes start_clean.boot is useful.
        # For e.g. 'setup', one may even want to name another boot script.
        case "$COMMAND" in
            console|foreground)
                if [ -f "$REL_DIR/$REL_NAME.boot" ]; then
                  BOOTFILE="$REL_DIR/$REL_NAME"
                else
                  BOOTFILE="$REL_DIR/start"
                fi
                ;;
            console_clean)
                BOOTFILE="$REL_DIR/start_clean"
                ;;
        esac
        case "$COMMAND" in
            foreground)
                FOREGROUNDOPTIONS="-noshell -noinput +Bd"
                ;;
            *)
                FOREGROUNDOPTIONS=''
                ;;
        esac

        # set before generate_config
        if [ "${_EMQX_START_DAEMON_MODE:-}" = 1 ]; then
            tr_log_to_env
        else
            maybe_log_to_console
        fi

        #generate app.config and vm.args
        generate_config "$NAME_TYPE" "$NAME"

        check_license

        # Setup beam-required vars
        EMU="beam"
        PROGNAME="${0}"

        export EMU
        export PROGNAME

        # Store passed arguments since they will be erased by `set`
        # add emqx_data_dir to boot command so it is visible from 'ps -ef'
        ARGS="$*"

        # shellcheck disable=SC2086
        # Build an array of arguments to pass to exec later on
        # Build it here because this command will be used for logging.
        if [ "$IS_ELIXIR" = no ] || [ "${EMQX_CONSOLE_FLAVOR:-}" = 'erl' ] ; then
            # pass down RELEASE_LIB so we can switch to IS_ELIXIR=no
            # to boot an Erlang node from the elixir release
            set -- "$BINDIR/erlexec" \
                $FOREGROUNDOPTIONS \
                -boot "$BOOTFILE" \
                -boot_var RELEASE_LIB "$ERTS_LIB_DIR" \
                -boot_var ERTS_LIB_DIR "$ERTS_LIB_DIR" \
                -mode "$CODE_LOADING_MODE" \
                -config "$CONF_FILE" \
                -args_file "$ARGS_FILE" \
                $EPMD_ARGS
        else
            set -- "$REL_DIR/iex" \
                --boot "$BOOTFILE" \
                --boot-var RELEASE_LIB "${ERTS_LIB_DIR}" \
                --erl-config "${CONF_FILE}" \
                --vm-args "${ARGS_FILE}" \
                --erl "$FOREGROUNDOPTIONS" \
                --erl "-mode $CODE_LOADING_MODE" \
                --erl "$EPMD_ARGS" \
                --werl
        fi

        # Log the startup
        logger -t "${REL_NAME}[$$]" "EXEC: $* -- ${1+$ARGS} -emqx_data_dir ${DATA_DIR}"

        # Start the VM
        exec "$@" -- ${1+$ARGS} -emqx_data_dir "${DATA_DIR}"
        ;;

    ctl)
        assert_node_alive

        shift

        relx_nodetool rpc_infinity emqx_ctl run_command "$@"
        ;;
    rpc)
        assert_node_alive

        shift

        relx_nodetool rpc "$@"
        ;;
    rpcterms)
        assert_node_alive

        shift

        relx_nodetool rpcterms "$@"
        ;;
    eval)
        assert_node_alive

        shift
        if [ "$IS_ELIXIR" = "yes" ]
        then
          "$REL_DIR/elixir" \
              --hidden \
              --name "rand-$(relx_gen_id)-$NAME" \
              --cookie "$COOKIE" \
              --boot "$REL_DIR/start_clean" \
              --boot-var RELEASE_LIB "$ERTS_LIB_DIR" \
              --vm-args "$REL_DIR/remote.vm.args" \
              --erl "-start_epmd false -epmd_module ekka_epmd" \
              --rpc-eval "$NAME" "$@"
        else
          relx_nodetool "eval" "$@"
        fi
        ;;
    eval-erl)
        assert_node_alive

        shift
        relx_nodetool "eval" "$@"
        ;;

    check_config)
        check_config
        ;;
    *)
        usage "$COMMAND"
        exit 1
        ;;
esac

exit 0