refactor(bin/emqx): speed up boot

The main slow-down is the overheads of booting up beam with the
'start_clean' boot file (which loads all modules).

Prior to this change, beam is started multiple times in order to
resolve configuration values.

After this change:
* For boot commands such as 'start', 'console' and
  'foreground', it starts beam twice:
  - 1st is to check platform compatibility
  - 2nd is to resolve all configs required for boot in a batch
* For non-boot commands, such as 'ctl' and 'ping', it does not
  require to start beam for config resolution at all
This commit is contained in:
Zaiming (Stone) Shi 2022-04-17 11:51:38 +02:00
parent fbf6c57d46
commit ce902b5405
4 changed files with 148 additions and 116 deletions

View File

@ -351,7 +351,7 @@ fields("node") ->
, 'readOnly' => true , 'readOnly' => true
})} })}
, {"dist_net_ticktime", , {"dist_net_ticktime",
sc(emqx_schema:duration(), sc(emqx_schema:duration_s(),
#{ mapping => "vm_args.-kernel net_ticktime" #{ mapping => "vm_args.-kernel net_ticktime"
, default => "2m" , default => "2m"
, 'readOnly' => true , 'readOnly' => true

236
bin/emqx
View File

@ -10,6 +10,7 @@ if [ "$DEBUG" -eq 1 ]; then
fi fi
RUNNER_ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)" RUNNER_ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)"
# shellcheck disable=SC1090,SC1091 # shellcheck disable=SC1090,SC1091
. "$RUNNER_ROOT_DIR"/releases/emqx_vars . "$RUNNER_ROOT_DIR"/releases/emqx_vars
@ -39,7 +40,7 @@ export ERTS_LIB_DIR="$RUNNER_ROOT_DIR/lib"
DYNLIBS_DIR="$RUNNER_ROOT_DIR/dynlibs" DYNLIBS_DIR="$RUNNER_ROOT_DIR/dynlibs"
# Echo to stderr on errors # Echo to stderr on errors
echoerr() { echo "ERROR: $*" 1>&2; } echoerr() { echo -e "$*" 1>&2; }
die() { die() {
echoerr "ERROR: $1" echoerr "ERROR: $1"
@ -53,20 +54,6 @@ assert_node_alive() {
fi fi
} }
# Echo to stderr on errors
echoerr() { echo "$*" 1>&2; }
check_erlang_start() {
# RELEASE_LIB is used by Elixir
"$BINDIR/$PROGNAME" \
-noshell \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-boot "$REL_DIR/start_clean" \
-s crypto start \
-s erlang halt
}
usage() { usage() {
local command="$1" local command="$1"
@ -194,7 +181,7 @@ usage() {
echo "More:" echo "More:"
echo " Shell attach: remote_console | attach" echo " Shell attach: remote_console | attach"
echo " Up/Down-grade: upgrade | downgrade | install | uninstall" echo " Up/Down-grade: upgrade | downgrade | install | uninstall"
echo " Install info: ertspath | root_dir | versions" echo " Install info: ertspath | root_dir"
echo " Runtime info: pid | ping | versions" echo " Runtime info: pid | ping | versions"
echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl" echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl"
echo '' echo ''
@ -221,25 +208,23 @@ if [ "${2:-}" = 'help' ]; then
fi fi
fi fi
if ! check_erlang_start >/dev/null 2>&1; then ## IS_BOOT_COMMAND is set for later to inspect node name and cookie from hocon config (or env variable)
BUILD_INFO="$(cat "${REL_DIR}/BUILD_INFO")" case "${COMMAND}" in
## failed to start, might be due to missing libs, try to be portable start|console|console_clean|foreground)
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-$DYNLIBS_DIR}" IS_BOOT_COMMAND='yes'
if [ "$LD_LIBRARY_PATH" != "$DYNLIBS_DIR" ]; then ;;
export LD_LIBRARY_PATH="$DYNLIBS_DIR:$LD_LIBRARY_PATH" ertspath)
fi echo "$ERTS_DIR"
deps_hint="Please make sure openssl-1.1.1 (libcrypto), libncurses and libatomic1 are installed." exit 0
if ! check_erlang_start; then ;;
## it's hopeless root_dir)
echoerr "FATAL: Unable to start Erlang." echo "$RUNNER_ROOT_DIR"
echoerr "$deps_hint" exit 0
echoerr "Also ensure it's running on the correct platform:" ;;
echoerr "$BUILD_INFO" *)
exit 1 IS_BOOT_COMMAND='no'
fi ;;
echoerr "Using libs from '${DYNLIBS_DIR}' due to missing from the OS." esac
echoerr "$deps_hint"
fi
## backward compatible ## backward compatible
if [ -d "$ERTS_DIR/lib" ]; then if [ -d "$ERTS_DIR/lib" ]; then
@ -272,6 +257,68 @@ if [ "$ES" -ne 0 ]; then
exit $ES exit $ES
fi fi
COMPATIBILITY_CHECK='
io:format("BEAM_OK~n", []),
try
[_|_] = L = crypto:info_lib(),
io:format("CRYPTO_OK ~0p~n", [L])
catch
_ : _ ->
%% so logger has the chance to log something
timer:sleep(100),
halt(1)
end,
try
mnesia_hook:module_info(),
io:format("MNESIA_OK~n", [])
catch
_ : _ ->
io:format("WARNING: Mnesia app has no post-coommit hook support~n", []),
halt(2)
end,
halt(0).
'
compatiblity_info() {
# RELEASE_LIB is used by Elixir
"$BINDIR/$PROGNAME" \
-noshell \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-boot "$REL_DIR/start_clean" \
-eval "$COMPATIBILITY_CHECK"
}
# Collect Eralng/OTP runtime sanity and compatibility in one go
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
# Read BUILD_INFO early as the next commands may mess up the shell
BUILD_INFO="$(cat "${REL_DIR}/BUILD_INFO")"
COMPATIBILITY_INFO="$(compatiblity_info 2>/dev/null || true)"
if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then
## failed to start, might be due to missing libs, try to be portable
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-$DYNLIBS_DIR}"
if [ "$LD_LIBRARY_PATH" != "$DYNLIBS_DIR" ]; then
export LD_LIBRARY_PATH="$DYNLIBS_DIR:$LD_LIBRARY_PATH"
fi
COMPATIBILITY_INFO="$(compatiblity_info 2>&1 || true)"
if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'BEAM_OK'); then
## not able to start beam.smp
echoerr "$COMPATIBILITY_INFO"
echoerr "Please ensure it is running on the correct platform:"
echoerr "$BUILD_INFO"
echoerr "Version=$REL_VSN"
echoerr "Required dependencies: openssl-1.1.1 (libcrypto), libncurses and libatomic1"
exit 1
elif ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then
## not able to start crypto app
echoerr "$COMPATIBILITY_INFO"
exit 2
fi
echoerr "Using libs from '${DYNLIBS_DIR}' due to missing from the OS."
fi
export COMPATIBILITY_INFO
fi
NO_EPMD="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka" NO_EPMD="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka"
EPMD_ARGS="${EPMD_ARGS:-${NO_EPMD}}" EPMD_ARGS="${EPMD_ARGS:-${NO_EPMD}}"
@ -356,22 +403,66 @@ call_hocon() {
|| die "call_hocon_failed: $*" $? || die "call_hocon_failed: $*" $?
} }
get_config_value() { ## Resolve boot configs in a batch
## This is because starting the Erlang beam with all modules loaded
## and parsing HOCON config + environment variables is a non-trivial task
CONF_KEYS=( 'node.data_dir' 'node.name' 'node.cookie' 'db.backend' )
if [ "$IS_ENTERPRISE" = 'yes' ]; then
CONF_KEYS+=( 'license.file' 'license.key' )
fi
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
if [ "${EMQX_BOOT_CONFIGS:-}" = '' ]; then
EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
## export here so the 'console' command recursively called from
## 'start' command does not have to parse the configs again
export EMQX_BOOT_CONFIGS
fi
else
# For non-boot commands, we try to get data_dir from ps -ef command
# shellcheck disable=SC2009
PS_LINE="$(ps -ef | grep "\-[r]oot $RUNNER_ROOT_DIR" | grep -oE "\-emqx_data_dir.*"|| true)"
if [ "$(echo -e "$PS_LINE" | wc -l)" -eq 1 ]; then
## only one emqx node is running
## strip 'emqx_data_dir ' and ' --' because the dir in between may contain spaces
DATA_DIR="$(echo -e "$PS_LINE" | sed -e 's#.\+emqx_data_dir\s##g' | sed -e 's#\s--$##g')"
if [ "$DATA_DIR" = '' ]; then
## this should not happen unless -emqx_data_dir is not set
die "node_is_not_running!" 1
fi
EMQX_BOOT_CONFIGS="node.data_dir=$DATA_DIR"
else
## None or more than one node is running, resolve from boot config
EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
fi
fi
get_boot_config() {
path_to_value="$1" path_to_value="$1"
call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf get "$path_to_value" | tr -d \" echo -e "$EMQX_BOOT_CONFIGS" | grep "$path_to_value=" | sed -e "s/$path_to_value=//g" | tr -d \"
} }
DATA_DIR="$(get_boot_config 'node.data_dir')"
# ensure no trailing /
DATA_DIR="${DATA_DIR%/}"
if [[ $DATA_DIR != /* ]]; then
# relative path
DATA_DIR="${RUNNER_ROOT_DIR}/${DATA_DIR}"
fi
CONFIGS_DIR="$DATA_DIR/configs"
mkdir -p "$CONFIGS_DIR"
check_license() { check_license() {
if [ "$IS_ENTERPRISE" == "no" ]; then if [ "$IS_ENTERPRISE" == "no" ]; then
return 0 return 0
fi fi
file_license="${EMQX_LICENSE__FILE:-$(get_config_value license.file)}" file_license="${EMQX_LICENSE__FILE:-$(get_boot_config 'license.file')}"
if [[ -n "$file_license" && ("$file_license" != "undefined") ]]; then if [[ -n "$file_license" && ("$file_license" != "undefined") ]]; then
call_nodetool check_license_file "$file_license" call_nodetool check_license_file "$file_license"
else else
key_license="${EMQX_LICENSE__KEY:-$(get_config_value license.key)}" key_license="${EMQX_LICENSE__KEY:-$(get_boot_config 'license.key')}"
if [[ -n "$key_license" && ("$key_license" != "undefined") ]]; then if [[ -n "$key_license" && ("$key_license" != "undefined") ]]; then
call_nodetool check_license_key "$key_license" call_nodetool check_license_key "$key_license"
@ -396,15 +487,6 @@ relx_start_command() {
"$START_OPTION" "$START_OPTION"
} }
DATA_DIR="$(get_config_value 'node.data_dir')"
DATA_DIR="${DATA_DIR%/}"
if [[ $DATA_DIR != /* ]]; then
# relative
DATA_DIR="${RUNNER_ROOT_DIR}/${DATA_DIR}"
fi
CONFIGS_DIR="$DATA_DIR/configs"
mkdir -p "$CONFIGS_DIR"
# Function to generate app.config and vm.args # Function to generate app.config and vm.args
# sets two environment variables CONF_FILE and ARGS_FILE # sets two environment variables CONF_FILE and ARGS_FILE
generate_config() { generate_config() {
@ -416,7 +498,7 @@ generate_config() {
## timestamp for each generation ## timestamp for each generation
local NOW_TIME local NOW_TIME
NOW_TIME="$(call_hocon now_time)" NOW_TIME="$(date +'%Y.%m.%d.%H.%M.%S')"
## this command populates two files: app.<time>.config and vm.<time>.args ## this command populates two files: app.<time>.config and vm.<time>.args
## NOTE: the generate command merges environment variables to the base config (emqx.conf), ## NOTE: the generate command merges environment variables to the base config (emqx.conf),
@ -524,7 +606,7 @@ latest_vm_args() {
if [ -f "$vm_args_file" ]; then if [ -f "$vm_args_file" ]; then
echo "$vm_args_file" echo "$vm_args_file"
else else
echoerr "ERROR: node not initialized?" echoerr "Node not initialized?"
echoerr "Generated config file vm.*.args is not found for command '$COMMAND'" echoerr "Generated config file vm.*.args is not found for command '$COMMAND'"
echoerr "in config dir: $CONFIGS_DIR" echoerr "in config dir: $CONFIGS_DIR"
echoerr "In case the file has been deleted while the node is running," echoerr "In case the file has been deleted while the node is running,"
@ -575,31 +657,6 @@ maybe_log_to_console() {
fi fi
} }
# check if using an OTP version that has the mnesia_hook patch for use
# in mria.
is_otp_compatible() {
"$BINDIR/$PROGNAME" \
-noshell \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-boot "$REL_DIR/start_clean" \
-eval 'try mnesia_hook:module_info() of _ -> init:stop() catch _:_ -> halt(1) end.' >/dev/null 2>&1
}
## IS_BOOT_COMMAND is set for later to inspect node name and cookie from hocon config (or env variable)
case "${COMMAND}" in
start|console|console_clean|foreground)
IS_BOOT_COMMAND='yes'
;;
ertspath)
echo "$ERTS_DIR"
exit 0
;;
*)
IS_BOOT_COMMAND='no'
;;
esac
## make EMQX_NODE_COOKIE right
if [ -n "${EMQX_NODE_NAME:-}" ]; then if [ -n "${EMQX_NODE_NAME:-}" ]; then
export EMQX_NODE__NAME="${EMQX_NODE_NAME}" export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
unset EMQX_NODE_NAME unset EMQX_NODE_NAME
@ -613,7 +670,7 @@ NAME="${EMQX_NODE__NAME:-}"
if [ -z "$NAME" ]; then if [ -z "$NAME" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
# for boot commands, inspect emqx.conf for node name # for boot commands, inspect emqx.conf for node name
NAME="$(get_config_value node.name)" NAME="$(get_boot_config 'node.name')"
else else
vm_args_file="$(latest_vm_args 'EMQX_NODE__NAME')" vm_args_file="$(latest_vm_args 'EMQX_NODE__NAME')"
NAME="$(grep -E '^-s?name' "${vm_args_file}" | awk '{print $2}')" NAME="$(grep -E '^-s?name' "${vm_args_file}" | awk '{print $2}')"
@ -643,7 +700,7 @@ fi
COOKIE="${EMQX_NODE__COOKIE:-}" COOKIE="${EMQX_NODE__COOKIE:-}"
if [ -z "$COOKIE" ]; then if [ -z "$COOKIE" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
COOKIE="$(get_config_value node.cookie)" COOKIE="$(get_boot_config 'node.cookie')"
else else
vm_args_file="$(latest_vm_args 'EMQX_NODE__COOKIE')" vm_args_file="$(latest_vm_args 'EMQX_NODE__COOKIE')"
COOKIE="$(grep -E '^-setcookie' "${vm_args_file}" | awk '{print $2}')" COOKIE="$(grep -E '^-setcookie' "${vm_args_file}" | awk '{print $2}')"
@ -654,18 +711,14 @@ if [ -z "$COOKIE" ]; then
die "Please set node.cookie in $EMQX_ETC_DIR/emqx.conf or override from environment variable EMQX_NODE__COOKIE" die "Please set node.cookie in $EMQX_ETC_DIR/emqx.conf or override from environment variable EMQX_NODE__COOKIE"
fi fi
## check if OTP version has mnesia_hook patch; if not, fallback to ## check if OTP version has mnesia_hook feature; if not, fallback to
## using Mnesia DB backend. ## using Mnesia DB backend.
if [[ "${EMQX_DB__BACKEND:-}" != "mnesia" if [[ "$IS_BOOT_COMMAND" == 'yes' && "$(get_boot_config 'db.backend')" == "rlog" ]]; then
|| "${EMQX_DB__ROLE:-}" != "core" ]]; then if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'MNESIA_OK'); then
if [[ "$IS_BOOT_COMMAND" == 'yes' echoerr "DB Backend is RLOG, but an incompatible OTP version has been detected. Falling back to using Mnesia DB backend."
&& "$(get_config_value 'db.backend')" == "rlog" ]]; then
if ! is_otp_compatible; then
echoerr "DB Backend is RLOG, but an incompatible OTP version has been detected. Falling back to using Mnesia DB backend."
export EMQX_DB__BACKEND=mnesia export EMQX_DB__BACKEND=mnesia
export EMQX_DB__ROLE=core export EMQX_DB__ROLE=core
fi fi
fi
fi fi
cd "$RUNNER_ROOT_DIR" cd "$RUNNER_ROOT_DIR"
@ -843,12 +896,13 @@ case "${COMMAND}" in
# Setup beam-required vars # Setup beam-required vars
EMU="beam" EMU="beam"
PROGNAME="${0#*/}" PROGNAME="${0}"
export EMU export EMU
export PROGNAME export PROGNAME
# Store passed arguments since they will be erased by `set` # Store passed arguments since they will be erased by `set`
# add emqx_data_dir to boot command so it is visible from 'ps -ef'
ARGS="$*" ARGS="$*"
# shellcheck disable=SC2086 # shellcheck disable=SC2086
@ -879,10 +933,10 @@ case "${COMMAND}" in
fi fi
# Log the startup # Log the startup
logger -t "${REL_NAME}[$$]" "EXEC: $* -- ${1+$ARGS}" logger -t "${REL_NAME}[$$]" "EXEC: $* -- ${1+$ARGS} -emqx_data_dir ${DATA_DIR}"
# Start the VM # Start the VM
exec "$@" -- ${1+$ARGS} exec "$@" -- ${1+$ARGS} -emqx_data_dir "${DATA_DIR}"
;; ;;
ctl) ctl)
@ -906,12 +960,6 @@ case "${COMMAND}" in
relx_nodetool rpcterms "$@" relx_nodetool rpcterms "$@"
;; ;;
root_dir)
assert_node_alive
shift
relx_nodetool "eval" 'code:root_dir()'
;;
eval) eval)
assert_node_alive assert_node_alive

View File

@ -24,9 +24,7 @@
@set script=%~n0 @set script=%~n0
:: for attach and remote_console
@set EPMD_ARG=-start_epmd false -epmd_module ekka_epmd -proto_dist ekka @set EPMD_ARG=-start_epmd false -epmd_module ekka_epmd -proto_dist ekka
:: for erl command
@set ERL_FLAGS=%EPMD_ARG% @set ERL_FLAGS=%EPMD_ARG%
:: Discover the release root directory from the directory :: Discover the release root directory from the directory
@ -70,22 +68,13 @@
@set EMQX_DB__ROLE=core @set EMQX_DB__ROLE=core
@set conf_path="%etc_dir%\emqx.conf" @set conf_path="%etc_dir%\emqx.conf"
:: Extract node name from emqx.conf
@for /f "usebackq delims=" %%I in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% get node.name"`) do @(
@call :set_trim node_name %%I
)
@set node_name=%node_name:"=%
:: Extract node cookie from emqx.conf @for /f "usebackq tokens=1,2 delims==" %%a in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% multi_get node.name node.cookie node.data_dir"`) do @(
@for /f "usebackq delims=" %%I in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% get node.cookie"`) do @( if "%%a"=="node.name" set node_name=%%b
@call :set_trim node_cookie %%I if "%%a"=="node.cookie" set node_cookie=%%b
if "%%a"=="node.data_dir" set data_dir=%%b
) )
@set node_cookie=%node_cookie:"=%
:: Extract data_dir from emqx.conf
@for /f "usebackq delims=" %%I in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% get node.data_dir"`) do @(
@call :set_trim data_dir %%I
)
@set data_dir=%data_dir:"=% @set data_dir=%data_dir:"=%
:: remove trailing / :: remove trailing /
@if %data_dir:~-1%==/ SET data_dir=%data_dir:~0,-1% @if %data_dir:~-1%==/ SET data_dir=%data_dir:~0,-1%
@ -238,8 +227,3 @@ cd /d %rel_root_dir%
:attach :attach
%erl_exe% -hidden -remsh "%node_name%" -boot "%clean_boot_file_name%" "%node_type%" "remsh_%node_name%" -setcookie "%node_cookie%" %erl_exe% -hidden -remsh "%node_name%" -boot "%clean_boot_file_name%" "%node_type%" "remsh_%node_name%" -setcookie "%node_cookie%"
@goto :eof @goto :eof
:: Trim variable
:set_trim
@set %1=%2
@goto :eof

2
build
View File

@ -70,7 +70,7 @@ make_doc() {
local libs_dir1 libs_dir2 local libs_dir1 libs_dir2
libs_dir1="$("$FIND" "_build/default/lib/" -maxdepth 2 -name ebin -type d)" libs_dir1="$("$FIND" "_build/default/lib/" -maxdepth 2 -name ebin -type d)"
libs_dir2="$("$FIND" "_build/$PROFILE/lib/" -maxdepth 2 -name ebin -type d)" libs_dir2="$("$FIND" "_build/$PROFILE/lib/" -maxdepth 2 -name ebin -type d)"
libs_dir3="$("$FIND" "_build/$PROFILE/checkouts/" -maxdepth 2 -name ebin -type d || true)" libs_dir3="$("$FIND" "_build/$PROFILE/checkouts/" -maxdepth 2 -name ebin -type d 2>/dev/null || true)"
case $PROFILE in case $PROFILE in
emqx-enterprise) emqx-enterprise)
SCHEMA_MODULE='emqx_enterprise_conf_schema' SCHEMA_MODULE='emqx_enterprise_conf_schema'