emqx/bin/emqx

707 lines
21 KiB
Bash
Executable File

#!/bin/bash
# -*- tab-width:4;indent-tabs-mode:nil -*-
# ex: ts=4 sw=4 et
set -e
set -o pipefail
DEBUG="${DEBUG:-0}"
if [ "$DEBUG" -eq 1 ]; then
set -x
fi
ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)"
# shellcheck disable=SC1090
. "$ROOT_DIR"/releases/emqx_vars
# defined in emqx_vars
export RUNNER_ROOT_DIR
export RUNNER_ETC_DIR
export REL_VSN
RUNNER_SCRIPT="$RUNNER_BIN_DIR/$REL_NAME"
CODE_LOADING_MODE="${CODE_LOADING_MODE:-embedded}"
REL_DIR="$RUNNER_ROOT_DIR/releases/$REL_VSN"
SCHEMA_MOD=emqx_machine_schema
WHOAMI=$(whoami)
# Make sure log directory exists
mkdir -p "$RUNNER_LOG_DIR"
# Make sure data directory exists
mkdir -p "$RUNNER_DATA_DIR"
# Make sure data/configs exists
CONFIGS_DIR="$RUNNER_DATA_DIR/configs"
mkdir -p "$CONFIGS_DIR"
# hocon try to read environment variables starting with "EMQX_"
export HOCON_ENV_OVERRIDE_PREFIX='EMQX_'
export ROOTDIR="$RUNNER_ROOT_DIR"
export ERTS_DIR="$ROOTDIR/erts-$ERTS_VSN"
export BINDIR="$ERTS_DIR/bin"
export EMU="beam"
export PROGNAME="erl"
export LD_LIBRARY_PATH="$ERTS_DIR/lib:$LD_LIBRARY_PATH"
export ERTS_LIB_DIR="$ERTS_DIR/../lib"
MNESIA_DATA_DIR="$RUNNER_DATA_DIR/mnesia/$NAME"
# Echo to stderr on errors
echoerr() { echo "ERROR: $*" 1>&2; }
die() {
echoerr "ERROR: $1"
errno=${2:-1}
exit "$errno"
}
assert_node_alive() {
if ! relx_nodetool "ping" > /dev/null; then
die "node_is_not_running!" 1
fi
}
relx_usage() {
command="$1"
case "$command" in
unpack)
echo "Usage: $REL_NAME unpack [VERSION]"
echo "Unpacks a release package VERSION, it assumes that this"
echo "release package tarball has already been deployed at one"
echo "of the following locations:"
echo " releases/<relname>-<version>.tar.gz"
echo " releases/<relname>-<version>.zip"
;;
install)
echo "Usage: $REL_NAME install [VERSION]"
echo "Installs a release package VERSION, it assumes that this"
echo "release package tarball has already been deployed at one"
echo "of the following locations:"
echo " releases/<relname>-<version>.tar.gz"
echo " releases/<relname>-<version>.zip"
echo ""
echo " --no-permanent Install release package VERSION but"
echo " don't make it permanent"
;;
uninstall)
echo "Usage: $REL_NAME uninstall [VERSION]"
echo "Uninstalls a release VERSION, it will only accept"
echo "versions that are not currently in use"
;;
upgrade)
echo "Usage: $REL_NAME upgrade [VERSION]"
echo "Upgrades the currently running release to VERSION, it assumes"
echo "that a release package tarball has already been deployed at one"
echo "of the following locations:"
echo " releases/<relname>-<version>.tar.gz"
echo " releases/<relname>-<version>.zip"
echo ""
echo " --no-permanent Install release package VERSION but"
echo " don't make it permanent"
;;
downgrade)
echo "Usage: $REL_NAME downgrade [VERSION]"
echo "Downgrades the currently running release to VERSION, it assumes"
echo "that a release package tarball has already been deployed at one"
echo "of the following locations:"
echo " releases/<relname>-<version>.tar.gz"
echo " releases/<relname>-<version>.zip"
echo ""
echo " --no-permanent Install release package VERSION but"
echo " don't make it permanent"
;;
*)
echo "Usage: $REL_NAME {start|start_boot <file>|ertspath|foreground|stop|pid|ping|console|console_clean|console_boot <file>|attach|remote_console|upgrade|downgrade|install|uninstall|versions|escript|ctl|rpc|rpcterms|eval|root_dir}"
;;
esac
}
# Simple way to check the correct user and fail early
check_user() {
# Validate that the user running the script is the owner of the
# RUN_DIR.
if [ "$RUNNER_USER" ] && [ "x$WHOAMI" != "x$RUNNER_USER" ]; then
if [ "x$WHOAMI" != "xroot" ]; then
echo "You need to be root or use sudo to run this command"
exit 1
fi
CMD="DEBUG=$DEBUG \"$RUNNER_SCRIPT\" "
for ARG in "$@"; do
CMD="${CMD} \"$ARG\""
done
# This will drop priviledges into the runner user
# It exec's in a new shell and the current shell will exit
exec su - "$RUNNER_USER" -c "$CMD"
fi
}
# Make sure the user running this script is the owner and/or su to that user
check_user "$@"
ES=$?
if [ "$ES" -ne 0 ]; then
exit $ES
fi
if [ -z "$WITH_EPMD" ]; then
EPMD_ARG="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka"
else
EPMD_ARG="-start_epmd true $PROTO_DIST_ARG"
fi
# Warn the user if ulimit -n is less than 1024
ULIMIT_F=$(ulimit -n)
if [ "$ULIMIT_F" -lt 1024 ]; then
echo "!!!!"
echo "!!!! WARNING: ulimit -n is ${ULIMIT_F}; 1024 is the recommended minimum."
echo "!!!!"
fi
SED_REPLACE="sed -i "
case $(sed --help 2>&1) in
*GNU*) SED_REPLACE="sed -i ";;
*BusyBox*) SED_REPLACE="sed -i ";;
*) SED_REPLACE="sed -i '' ";;
esac
# Get node pid
relx_get_pid() {
if output="$(relx_nodetool rpcterms os getpid)"
then
# shellcheck disable=SC2001 # Escaped quote taken as closing quote in editor
echo "$output" | sed -e 's/"//g'
return 0
else
echo "$output"
return 1
fi
}
# Connect to a remote node
relx_rem_sh() {
# Generate a unique id used to allow multiple remsh to the same node
# transparently
id="remsh$(relx_gen_id)-${NAME}"
# Get the node's ticktime so that we use the same thing.
TICKTIME="$(relx_nodetool rpcterms net_kernel get_net_ticktime)"
# shellcheck disable=SC2086 # $EPMD_ARG is supposed to be split by whitespace
# Setup remote shell command to control node
exec "$BINDIR/erl" "$NAME_TYPE" "$id" -remsh "$NAME" -boot "$REL_DIR/start_clean" \
-boot_var ERTS_LIB_DIR "$ERTS_LIB_DIR" \
-setcookie "$COOKIE" -hidden -kernel net_ticktime "$TICKTIME" $EPMD_ARG
}
# Generate a random id
relx_gen_id() {
od -t x -N 4 /dev/urandom | head -n1 | awk '{print $2}'
}
# Control a node
relx_nodetool() {
command="$1"; shift
ERL_FLAGS="$ERL_FLAGS $EPMD_ARG" \
"$ERTS_DIR/bin/escript" "$ROOTDIR/bin/nodetool" "$NAME_TYPE" "$NAME" \
-setcookie "$COOKIE" "$command" "$@"
}
call_hocon() {
"$ERTS_DIR/bin/escript" "$ROOTDIR/bin/nodetool" hocon "$@" \
|| die "call_hocon_failed: $*" $?
}
# Run an escript in the node's environment
relx_escript() {
shift; scriptpath="$1"; shift
"$ERTS_DIR/bin/escript" "$ROOTDIR/$scriptpath" "$@"
}
# Output a start command for the last argument of run_erl
relx_start_command() {
printf "exec \"%s\" \"%s\"" "$RUNNER_SCRIPT" \
"$START_OPTION"
}
# Function to generate app.config and vm.args
generate_config() {
local name_type="$1"
local node_name="$2"
## Delete the *.siz files first or it cann't start after
## changing the config 'log.rotation.size'
rm -rf "${RUNNER_LOG_DIR}"/*.siz
EMQX_LICENSE_CONF_OPTION=""
if [ "${EMQX_LICENSE_CONF:-}" != "" ]; then
EMQX_LICENSE_CONF_OPTION="-c ${EMQX_LICENSE_CONF}"
fi
## timestamp for each generation
local NOW_TIME
NOW_TIME="$(call_hocon now_time)"
## ths command populates two files: app.<time>.config and vm.<time>.args
## disable SC2086 to allow EMQX_LICENSE_CONF_OPTION to split
# shellcheck disable=SC2086
call_hocon -v -t "$NOW_TIME" -s $SCHEMA_MOD -c "$RUNNER_ETC_DIR"/emqx.conf $EMQX_LICENSE_CONF_OPTION -d "$RUNNER_DATA_DIR"/configs generate
## filenames are per-hocon convention
local CONF_FILE="$CONFIGS_DIR/app.$NOW_TIME.config"
local HOCON_GEN_ARG_FILE="$CONFIGS_DIR/vm.$NOW_TIME.args"
CONFIG_ARGS="-config $CONF_FILE -args_file $HOCON_GEN_ARG_FILE"
## Merge hocon generated *.args into the vm.args
TMP_ARG_FILE="$CONFIGS_DIR/vm.args.tmp"
cp "$RUNNER_ETC_DIR/vm.args" "$TMP_ARG_FILE"
echo "" >> "$TMP_ARG_FILE"
echo "-pa ${REL_DIR}/consolidated" >> "$TMP_ARG_FILE"
## read lines from generated vm.<time>.args file
## drop comment lines, and empty lines using sed
## pipe the lines to a while loop
sed '/^#/d' "$HOCON_GEN_ARG_FILE" | sed '/^$/d' | while IFS='' read -r ARG_LINE || [ -n "$ARG_LINE" ]; do
## in the loop, split the 'key[:space:]value' pair
ARG_KEY=$(echo "$ARG_LINE" | awk '{$NF="";print}')
ARG_VALUE=$(echo "$ARG_LINE" | awk '{print $NF}')
## use the key to look up in vm.args file for the value
TMP_ARG_VALUE=$(grep "^$ARG_KEY" "$TMP_ARG_FILE" || true | awk '{print $NF}')
## compare generated (to override) value to original (to be overriden) value
if [ "$ARG_VALUE" != "$TMP_ARG_VALUE" ] ; then
## if they are different
if [ -n "$TMP_ARG_VALUE" ]; then
## if the old value is present, replace it with generated value
sh -c "$SED_REPLACE 's|^$ARG_KEY.*$|$ARG_LINE|' $TMP_ARG_FILE"
else
## otherwise append generated value to the end
echo "$ARG_LINE" >> "$TMP_ARG_FILE"
fi
fi
done
echo "$name_type $node_name" >> "$TMP_ARG_FILE"
## rename the generated vm.<time>.args file
mv -f "$TMP_ARG_FILE" "$HOCON_GEN_ARG_FILE"
# shellcheck disable=SC2086
if ! relx_nodetool chkconfig $CONFIG_ARGS; then
die "failed_to_check_config $CONFIG_ARGS"
fi
}
# Call bootstrapd for daemon commands like start/stop/console
bootstrapd() {
if [ -e "$RUNNER_DATA_DIR/.erlang.cookie" ]; then
chown "$RUNNER_USER" "$RUNNER_DATA_DIR"/.erlang.cookie
fi
}
# check if a PID is down
is_down() {
PID="$1"
if ps -p "$PID" >/dev/null; then
# still around
# shellcheck disable=SC2009 # this grep pattern is not a part of the progra names
if ps -p "$PID" | grep -q 'defunct'; then
# zombie state, print parent pid
parent="$(ps -o ppid= -p "$PID" | tr -d ' ')"
echo "WARN: $PID is marked <defunct>, parent:"
ps -p "$parent"
return 0
fi
return 1
fi
# it's gone
return 0
}
wait_for() {
local WAIT_TIME
local CMD
WAIT_TIME="$1"
shift
CMD="$*"
while true; do
if $CMD >/dev/null 2>&1; then
return 0
fi
if [ "$WAIT_TIME" -le 0 ]; then
return 1
fi
WAIT_TIME=$((WAIT_TIME - 1))
sleep 1
done
}
# Use $CWD/etc/sys.config if exists
if [ -z "$RELX_CONFIG_PATH" ]; then
if [ -f "$RUNNER_ETC_DIR/sys.config" ]; then
RELX_CONFIG_PATH="-config $RUNNER_ETC_DIR/sys.config"
else
RELX_CONFIG_PATH=""
fi
fi
IS_BOOT_COMMAND='no'
case "$1" in
start|start_boot)
IS_BOOT_COMMAND='yes'
;;
console|console_clean|console_boot)
IS_BOOT_COMMAND='yes'
;;
foreground)
IS_BOOT_COMMAND='yes'
;;
esac
## Possible ways to configure emqx node name:
## 1. configure node.name in emqx.conf
## 2. override with environment variable EMQX_NODE_NAME
## Node name is either short-name (without '@'), e.g. 'emqx'
## or long name (with '@') e.g. 'emqx@example.net' or 'emqx@127.0.0.1'
NAME="${EMQX_NODE_NAME:-}"
if [ -z "$NAME" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
# for boot commands, inspect emqx.conf for node name
NAME="$(call_hocon -s $SCHEMA_MOD -c "$RUNNER_ETC_DIR"/emqx.conf get node.name | tr -d \")"
else
# for non-boot commands, inspect vm.<time>.args for node name
# shellcheck disable=SC2012,SC2086
LATEST_VM_ARGS="$(ls -t $CONFIGS_DIR/vm.*.args | head -1)"
NAME="$(grep -E '^-s?name' "$LATEST_VM_ARGS" | awk '{print $2}')"
fi
fi
# force to use 'emqx' short name
[ -z "$NAME" ] && NAME='emqx'
case "$NAME" in
*@*)
NAME_TYPE='-name'
;;
*)
NAME_TYPE='-sname'
esac
SHORT_NAME="$(echo "$NAME" | awk -F'@' '{print $1}')"
export ESCRIPT_NAME="$SHORT_NAME"
PIPE_DIR="${PIPE_DIR:-/$RUNNER_DATA_DIR/${WHOAMI}_erl_pipes/$NAME/}"
COOKIE="${EMQX_NODE_COOKIE:-}"
if [ -z "$COOKIE" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
COOKIE="$(call_hocon -s $SCHEMA_MOD -c "$RUNNER_ETC_DIR"/emqx.conf get node.cookie | tr -d \")"
else
# shellcheck disable=SC2012,SC2086
LATEST_VM_ARGS="$(ls -t $CONFIGS_DIR/vm.*.args | head -1)"
COOKIE="$(grep -E '^-setcookie' "$LATEST_VM_ARGS" | awk '{print $2}')"
fi
fi
if [ -z "$COOKIE" ]; then
die "Please set node.cookie in $RUNNER_ETC_DIR/emqx.conf or override from environment variable EMQX_NODE_COOKIE"
fi
# Support for IPv6 Dist. See: https://github.com/emqtt/emqttd/issues/1460
PROTO_DIST="$(call_hocon -s $SCHEMA_MOD -c "$RUNNER_ETC_DIR"/emqx.conf get cluster.proto_dist | tr -d \")"
if [ -z "$PROTO_DIST" ]; then
PROTO_DIST_ARG=""
else
PROTO_DIST_ARG="-proto_dist $PROTO_DIST"
fi
cd "$ROOTDIR"
case "$1" in
start|start_boot)
# Make sure a node IS not running
if relx_nodetool "ping" >/dev/null 2>&1; then
die "node_is_already_running!"
fi
# Bootstrap daemon command (check perms & drop to $RUNNER_USER)
bootstrapd
# this flag passes down to console mode
# so we know it's intended to be run in daemon mode
export _EMQX_START_MODE="$1"
# Save this for later.
CMD=$1
case "$1" in
start)
shift
START_OPTION="console"
HEART_OPTION="start"
;;
start_boot)
shift
START_OPTION="console_boot"
HEART_OPTION="start_boot"
;;
esac
RUN_PARAM="$*"
# Set arguments for the heart command
set -- "$RUNNER_SCRIPT" "$HEART_OPTION"
[ "$RUN_PARAM" ] && set -- "$@" "$RUN_PARAM"
# Export the HEART_COMMAND
HEART_COMMAND="$RUNNER_SCRIPT $CMD"
export HEART_COMMAND
## See: http://erlang.org/doc/man/run_erl.html
# Export the RUN_ERL_LOG_GENERATIONS
export RUN_ERL_LOG_GENERATIONS=${RUN_ERL_LOG_GENERATIONS:-"5"}
# Export the RUN_ERL_LOG_MAXSIZE
export RUN_ERL_LOG_MAXSIZE=${RUN_ERL_LOG_MAXSIZE:-"10485760"}
mkdir -p "$PIPE_DIR"
"$BINDIR/run_erl" -daemon "$PIPE_DIR" "$RUNNER_LOG_DIR" \
"$(relx_start_command)"
WAIT_TIME=${WAIT_FOR_ERLANG:-15}
if wait_for "$WAIT_TIME" 'relx_nodetool' 'ping'; then
echo "$EMQX_DESCRIPTION $REL_VSN is started successfully!"
exit 0
else
echo "$EMQX_DESCRIPTION $REL_VSN failed to start within ${WAIT_TIME} seconds,"
echo "see the output of '$0 console' for more information."
echo "If you want to wait longer, set the environment variable"
echo "WAIT_FOR_ERLANG to the number of seconds to wait."
exit 1
fi
;;
stop)
# Wait for the node to completely stop...
PID="$(relx_get_pid)"
if ! relx_nodetool "stop"; then
echoerr "Graceful shutdown failed PID=[$PID]"
exit 1
fi
WAIT_TIME="${WAIT_FOR_ERLANG_STOP:-60}"
if ! wait_for "$WAIT_TIME" 'is_down' "$PID"; then
msg="dangling after ${WAIT_TIME} seconds"
# also log to syslog
logger -t "${REL_NAME}[${PID}]" "STOP: $msg"
# log to user console
echoerr "stop failed, $msg"
exit 1
fi
logger -t "${REL_NAME}[${PID}]" "STOP: OK"
;;
restart|reboot)
echo "$EMQX_DESCRIPTION $REL_VSN is stopped: $("$RUNNER_BIN_DIR"/emqx stop)"
"$RUNNER_BIN_DIR"/emqx start
;;
pid)
## Get the VM's pid
if ! relx_get_pid; then
exit 1
fi
;;
ping)
assert_node_alive
;;
escript)
## Run an escript under the node's environment
if ! relx_escript "$@"; then
exit 1
fi
;;
attach)
assert_node_alive
# Bootstrap daemon command (check perms & drop to $RUNNER_USER)
bootstrapd
shift
exec "$BINDIR/to_erl" "$PIPE_DIR"
;;
remote_console)
assert_node_alive
# Bootstrap daemon command (check perms & drop to $RUNNER_USER)
bootstrapd
shift
relx_rem_sh
;;
upgrade|downgrade|install|unpack|uninstall)
if [ -z "$2" ]; then
echo "Missing version argument"
echo "Usage: $REL_NAME $1 {version}"
exit 1
fi
COMMAND="$1"; shift
assert_node_alive
ERL_FLAGS="$ERL_FLAGS $EPMD_ARG" \
exec "$BINDIR/escript" "$ROOTDIR/bin/install_upgrade.escript" \
"$COMMAND" "{'$REL_NAME', \"$NAME_TYPE\", '$NAME', '$COOKIE'}" "$@"
;;
versions)
assert_node_alive
COMMAND="$1"; shift
ERL_FLAGS="$ERL_FLAGS $EPMD_ARG" \
exec "$BINDIR/escript" "$ROOTDIR/bin/install_upgrade.escript" \
"versions" "{'$REL_NAME', \"$NAME_TYPE\", '$NAME', '$COOKIE'}" "$@"
;;
console|console_clean|console_boot)
# Bootstrap daemon command (check perms & drop to $RUNNER_USER)
bootstrapd
# .boot file typically just $REL_NAME (ie, the app name)
# however, for debugging, sometimes start_clean.boot is useful.
# For e.g. 'setup', one may even want to name another boot script.
case "$1" in
console)
if [ -f "$REL_DIR/$REL_NAME.boot" ]; then
BOOTFILE="$REL_DIR/$REL_NAME"
else
BOOTFILE="$REL_DIR/start"
fi
;;
console_clean)
BOOTFILE="$REL_DIR/start_clean"
;;
console_boot)
shift
BOOTFILE="$1"
shift
;;
esac
# set before generate_config
if [ "${_EMQX_START_MODE:-}" = '' ]; then
export EMQX_LOG__CONSOLE_HANDLER__ENABLE="${EMQX_LOG__CONSOLE_HANDLER__ENABLE:-true}"
fi
#generate app.config and vm.args
generate_config "$NAME_TYPE" "$NAME"
# Setup beam-required vars
EMU="beam"
PROGNAME="${0#*/}"
export EMU
export PROGNAME
# Store passed arguments since they will be erased by `set`
ARGS="$*"
# shellcheck disable=SC2086 # $RELX_CONFIG_PATH $CONFIG_ARGS $EPMD_ARG are supposed to be split by whitespace
# Build an array of arguments to pass to exec later on
# Build it here because this command will be used for logging.
set -- "$BINDIR/erlexec" \
-boot "$BOOTFILE" -mode "$CODE_LOADING_MODE" \
-boot_var ERTS_LIB_DIR "$ERTS_LIB_DIR" \
-mnesia dir "\"${MNESIA_DATA_DIR}\"" \
$RELX_CONFIG_PATH $CONFIG_ARGS $EPMD_ARG
# Log the startup
logger -t "${REL_NAME}[$$]" "EXEC: $* -- ${1+$ARGS}"
# Start the VM
exec "$@" -- ${1+$ARGS}
;;
foreground)
# Bootstrap daemon command (check perms & drop to $RUNNER_USER)
bootstrapd
# start up the release in the foreground for use by runit
# or other supervision services
# set before generate_config
export EMQX_LOG__CONSOLE_HANDLER__ENABLE="${EMQX_LOG__CONSOLE_HANDLER__ENABLE:-true}"
#generate app.config and vm.args
generate_config "$NAME_TYPE" "$NAME"
[ -f "$REL_DIR/$REL_NAME.boot" ] && BOOTFILE="$REL_NAME" || BOOTFILE=start
FOREGROUNDOPTIONS="-noshell -noinput +Bd"
# Setup beam-required vars
EMU=beam
PROGNAME="${0#*/}"
export EMU
export PROGNAME
# Store passed arguments since they will be erased by `set`
ARGS="$*"
# shellcheck disable=SC2086 # $RELX_CONFIG_PATH $CONFIG_ARGS $EPMD_ARG are supposed to be split by whitespace
# Build an array of arguments to pass to exec later on
# Build it here because this command will be used for logging.
set -- "$BINDIR/erlexec" $FOREGROUNDOPTIONS \
-boot "$REL_DIR/$BOOTFILE" -mode "$CODE_LOADING_MODE" \
-boot_var ERTS_LIB_DIR "$ERTS_LIB_DIR" \
-mnesia dir "\"${MNESIA_DATA_DIR}\"" \
$RELX_CONFIG_PATH $CONFIG_ARGS $EPMD_ARG
# Log the startup
logger -t "${REL_NAME}[$$]" "EXEC: $* -- ${1+$ARGS}"
# Start the VM
exec "$@" -- ${1+$ARGS}
;;
ertspath)
echo "$ERTS_PATH"
;;
ctl)
assert_node_alive
shift
relx_nodetool rpc_infinity emqx_ctl run_command "$@"
;;
rpc)
assert_node_alive
shift
relx_nodetool rpc "$@"
;;
rpcterms)
assert_node_alive
shift
relx_nodetool rpcterms "$@"
;;
root_dir)
assert_node_alive
shift
relx_nodetool "eval" 'code:root_dir()'
;;
eval)
assert_node_alive
shift
relx_nodetool "eval" "$@"
;;
*)
relx_usage "$1"
exit 1
;;
esac
exit 0