refactor(bin/emqx): speed up boot

The main slow-down is the overheads of booting up beam with the
'start_clean' boot file (which loads all modules).

Prior to this change, beam is started multiple times in order to
resolve configuration values.

After this change:
* For boot commands such as 'start', 'console' and
  'foreground', it starts beam twice:
  - 1st is to check platform compatibility
  - 2nd is to resolve all configs required for boot in a batch
* For non-boot commands, such as 'ctl' and 'ping', it does not
  require to start beam for config resolution at all
This commit is contained in:
Zaiming (Stone) Shi 2022-04-17 11:51:38 +02:00
parent fbf6c57d46
commit ce902b5405
4 changed files with 148 additions and 116 deletions

View File

@ -351,7 +351,7 @@ fields("node") ->
, 'readOnly' => true , 'readOnly' => true
})} })}
, {"dist_net_ticktime", , {"dist_net_ticktime",
sc(emqx_schema:duration(), sc(emqx_schema:duration_s(),
#{ mapping => "vm_args.-kernel net_ticktime" #{ mapping => "vm_args.-kernel net_ticktime"
, default => "2m" , default => "2m"
, 'readOnly' => true , 'readOnly' => true

234
bin/emqx
View File

@ -10,6 +10,7 @@ if [ "$DEBUG" -eq 1 ]; then
fi fi
RUNNER_ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)" RUNNER_ROOT_DIR="$(cd "$(dirname "$(readlink "$0" || echo "$0")")"/..; pwd -P)"
# shellcheck disable=SC1090,SC1091 # shellcheck disable=SC1090,SC1091
. "$RUNNER_ROOT_DIR"/releases/emqx_vars . "$RUNNER_ROOT_DIR"/releases/emqx_vars
@ -39,7 +40,7 @@ export ERTS_LIB_DIR="$RUNNER_ROOT_DIR/lib"
DYNLIBS_DIR="$RUNNER_ROOT_DIR/dynlibs" DYNLIBS_DIR="$RUNNER_ROOT_DIR/dynlibs"
# Echo to stderr on errors # Echo to stderr on errors
echoerr() { echo "ERROR: $*" 1>&2; } echoerr() { echo -e "$*" 1>&2; }
die() { die() {
echoerr "ERROR: $1" echoerr "ERROR: $1"
@ -53,20 +54,6 @@ assert_node_alive() {
fi fi
} }
# Echo to stderr on errors
echoerr() { echo "$*" 1>&2; }
check_erlang_start() {
# RELEASE_LIB is used by Elixir
"$BINDIR/$PROGNAME" \
-noshell \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-boot "$REL_DIR/start_clean" \
-s crypto start \
-s erlang halt
}
usage() { usage() {
local command="$1" local command="$1"
@ -194,7 +181,7 @@ usage() {
echo "More:" echo "More:"
echo " Shell attach: remote_console | attach" echo " Shell attach: remote_console | attach"
echo " Up/Down-grade: upgrade | downgrade | install | uninstall" echo " Up/Down-grade: upgrade | downgrade | install | uninstall"
echo " Install info: ertspath | root_dir | versions" echo " Install info: ertspath | root_dir"
echo " Runtime info: pid | ping | versions" echo " Runtime info: pid | ping | versions"
echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl" echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl"
echo '' echo ''
@ -221,25 +208,23 @@ if [ "${2:-}" = 'help' ]; then
fi fi
fi fi
if ! check_erlang_start >/dev/null 2>&1; then ## IS_BOOT_COMMAND is set for later to inspect node name and cookie from hocon config (or env variable)
BUILD_INFO="$(cat "${REL_DIR}/BUILD_INFO")" case "${COMMAND}" in
## failed to start, might be due to missing libs, try to be portable start|console|console_clean|foreground)
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-$DYNLIBS_DIR}" IS_BOOT_COMMAND='yes'
if [ "$LD_LIBRARY_PATH" != "$DYNLIBS_DIR" ]; then ;;
export LD_LIBRARY_PATH="$DYNLIBS_DIR:$LD_LIBRARY_PATH" ertspath)
fi echo "$ERTS_DIR"
deps_hint="Please make sure openssl-1.1.1 (libcrypto), libncurses and libatomic1 are installed." exit 0
if ! check_erlang_start; then ;;
## it's hopeless root_dir)
echoerr "FATAL: Unable to start Erlang." echo "$RUNNER_ROOT_DIR"
echoerr "$deps_hint" exit 0
echoerr "Also ensure it's running on the correct platform:" ;;
echoerr "$BUILD_INFO" *)
exit 1 IS_BOOT_COMMAND='no'
fi ;;
echoerr "Using libs from '${DYNLIBS_DIR}' due to missing from the OS." esac
echoerr "$deps_hint"
fi
## backward compatible ## backward compatible
if [ -d "$ERTS_DIR/lib" ]; then if [ -d "$ERTS_DIR/lib" ]; then
@ -272,6 +257,68 @@ if [ "$ES" -ne 0 ]; then
exit $ES exit $ES
fi fi
COMPATIBILITY_CHECK='
io:format("BEAM_OK~n", []),
try
[_|_] = L = crypto:info_lib(),
io:format("CRYPTO_OK ~0p~n", [L])
catch
_ : _ ->
%% so logger has the chance to log something
timer:sleep(100),
halt(1)
end,
try
mnesia_hook:module_info(),
io:format("MNESIA_OK~n", [])
catch
_ : _ ->
io:format("WARNING: Mnesia app has no post-coommit hook support~n", []),
halt(2)
end,
halt(0).
'
compatiblity_info() {
# RELEASE_LIB is used by Elixir
"$BINDIR/$PROGNAME" \
-noshell \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-boot "$REL_DIR/start_clean" \
-eval "$COMPATIBILITY_CHECK"
}
# Collect Eralng/OTP runtime sanity and compatibility in one go
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
# Read BUILD_INFO early as the next commands may mess up the shell
BUILD_INFO="$(cat "${REL_DIR}/BUILD_INFO")"
COMPATIBILITY_INFO="$(compatiblity_info 2>/dev/null || true)"
if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then
## failed to start, might be due to missing libs, try to be portable
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-$DYNLIBS_DIR}"
if [ "$LD_LIBRARY_PATH" != "$DYNLIBS_DIR" ]; then
export LD_LIBRARY_PATH="$DYNLIBS_DIR:$LD_LIBRARY_PATH"
fi
COMPATIBILITY_INFO="$(compatiblity_info 2>&1 || true)"
if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'BEAM_OK'); then
## not able to start beam.smp
echoerr "$COMPATIBILITY_INFO"
echoerr "Please ensure it is running on the correct platform:"
echoerr "$BUILD_INFO"
echoerr "Version=$REL_VSN"
echoerr "Required dependencies: openssl-1.1.1 (libcrypto), libncurses and libatomic1"
exit 1
elif ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then
## not able to start crypto app
echoerr "$COMPATIBILITY_INFO"
exit 2
fi
echoerr "Using libs from '${DYNLIBS_DIR}' due to missing from the OS."
fi
export COMPATIBILITY_INFO
fi
NO_EPMD="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka" NO_EPMD="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka"
EPMD_ARGS="${EPMD_ARGS:-${NO_EPMD}}" EPMD_ARGS="${EPMD_ARGS:-${NO_EPMD}}"
@ -356,22 +403,66 @@ call_hocon() {
|| die "call_hocon_failed: $*" $? || die "call_hocon_failed: $*" $?
} }
get_config_value() { ## Resolve boot configs in a batch
## This is because starting the Erlang beam with all modules loaded
## and parsing HOCON config + environment variables is a non-trivial task
CONF_KEYS=( 'node.data_dir' 'node.name' 'node.cookie' 'db.backend' )
if [ "$IS_ENTERPRISE" = 'yes' ]; then
CONF_KEYS+=( 'license.file' 'license.key' )
fi
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
if [ "${EMQX_BOOT_CONFIGS:-}" = '' ]; then
EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
## export here so the 'console' command recursively called from
## 'start' command does not have to parse the configs again
export EMQX_BOOT_CONFIGS
fi
else
# For non-boot commands, we try to get data_dir from ps -ef command
# shellcheck disable=SC2009
PS_LINE="$(ps -ef | grep "\-[r]oot $RUNNER_ROOT_DIR" | grep -oE "\-emqx_data_dir.*"|| true)"
if [ "$(echo -e "$PS_LINE" | wc -l)" -eq 1 ]; then
## only one emqx node is running
## strip 'emqx_data_dir ' and ' --' because the dir in between may contain spaces
DATA_DIR="$(echo -e "$PS_LINE" | sed -e 's#.\+emqx_data_dir\s##g' | sed -e 's#\s--$##g')"
if [ "$DATA_DIR" = '' ]; then
## this should not happen unless -emqx_data_dir is not set
die "node_is_not_running!" 1
fi
EMQX_BOOT_CONFIGS="node.data_dir=$DATA_DIR"
else
## None or more than one node is running, resolve from boot config
EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
fi
fi
get_boot_config() {
path_to_value="$1" path_to_value="$1"
call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf get "$path_to_value" | tr -d \" echo -e "$EMQX_BOOT_CONFIGS" | grep "$path_to_value=" | sed -e "s/$path_to_value=//g" | tr -d \"
} }
DATA_DIR="$(get_boot_config 'node.data_dir')"
# ensure no trailing /
DATA_DIR="${DATA_DIR%/}"
if [[ $DATA_DIR != /* ]]; then
# relative path
DATA_DIR="${RUNNER_ROOT_DIR}/${DATA_DIR}"
fi
CONFIGS_DIR="$DATA_DIR/configs"
mkdir -p "$CONFIGS_DIR"
check_license() { check_license() {
if [ "$IS_ENTERPRISE" == "no" ]; then if [ "$IS_ENTERPRISE" == "no" ]; then
return 0 return 0
fi fi
file_license="${EMQX_LICENSE__FILE:-$(get_config_value license.file)}" file_license="${EMQX_LICENSE__FILE:-$(get_boot_config 'license.file')}"
if [[ -n "$file_license" && ("$file_license" != "undefined") ]]; then if [[ -n "$file_license" && ("$file_license" != "undefined") ]]; then
call_nodetool check_license_file "$file_license" call_nodetool check_license_file "$file_license"
else else
key_license="${EMQX_LICENSE__KEY:-$(get_config_value license.key)}" key_license="${EMQX_LICENSE__KEY:-$(get_boot_config 'license.key')}"
if [[ -n "$key_license" && ("$key_license" != "undefined") ]]; then if [[ -n "$key_license" && ("$key_license" != "undefined") ]]; then
call_nodetool check_license_key "$key_license" call_nodetool check_license_key "$key_license"
@ -396,15 +487,6 @@ relx_start_command() {
"$START_OPTION" "$START_OPTION"
} }
DATA_DIR="$(get_config_value 'node.data_dir')"
DATA_DIR="${DATA_DIR%/}"
if [[ $DATA_DIR != /* ]]; then
# relative
DATA_DIR="${RUNNER_ROOT_DIR}/${DATA_DIR}"
fi
CONFIGS_DIR="$DATA_DIR/configs"
mkdir -p "$CONFIGS_DIR"
# Function to generate app.config and vm.args # Function to generate app.config and vm.args
# sets two environment variables CONF_FILE and ARGS_FILE # sets two environment variables CONF_FILE and ARGS_FILE
generate_config() { generate_config() {
@ -416,7 +498,7 @@ generate_config() {
## timestamp for each generation ## timestamp for each generation
local NOW_TIME local NOW_TIME
NOW_TIME="$(call_hocon now_time)" NOW_TIME="$(date +'%Y.%m.%d.%H.%M.%S')"
## this command populates two files: app.<time>.config and vm.<time>.args ## this command populates two files: app.<time>.config and vm.<time>.args
## NOTE: the generate command merges environment variables to the base config (emqx.conf), ## NOTE: the generate command merges environment variables to the base config (emqx.conf),
@ -524,7 +606,7 @@ latest_vm_args() {
if [ -f "$vm_args_file" ]; then if [ -f "$vm_args_file" ]; then
echo "$vm_args_file" echo "$vm_args_file"
else else
echoerr "ERROR: node not initialized?" echoerr "Node not initialized?"
echoerr "Generated config file vm.*.args is not found for command '$COMMAND'" echoerr "Generated config file vm.*.args is not found for command '$COMMAND'"
echoerr "in config dir: $CONFIGS_DIR" echoerr "in config dir: $CONFIGS_DIR"
echoerr "In case the file has been deleted while the node is running," echoerr "In case the file has been deleted while the node is running,"
@ -575,31 +657,6 @@ maybe_log_to_console() {
fi fi
} }
# check if using an OTP version that has the mnesia_hook patch for use
# in mria.
is_otp_compatible() {
"$BINDIR/$PROGNAME" \
-noshell \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-boot "$REL_DIR/start_clean" \
-eval 'try mnesia_hook:module_info() of _ -> init:stop() catch _:_ -> halt(1) end.' >/dev/null 2>&1
}
## IS_BOOT_COMMAND is set for later to inspect node name and cookie from hocon config (or env variable)
case "${COMMAND}" in
start|console|console_clean|foreground)
IS_BOOT_COMMAND='yes'
;;
ertspath)
echo "$ERTS_DIR"
exit 0
;;
*)
IS_BOOT_COMMAND='no'
;;
esac
## make EMQX_NODE_COOKIE right
if [ -n "${EMQX_NODE_NAME:-}" ]; then if [ -n "${EMQX_NODE_NAME:-}" ]; then
export EMQX_NODE__NAME="${EMQX_NODE_NAME}" export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
unset EMQX_NODE_NAME unset EMQX_NODE_NAME
@ -613,7 +670,7 @@ NAME="${EMQX_NODE__NAME:-}"
if [ -z "$NAME" ]; then if [ -z "$NAME" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
# for boot commands, inspect emqx.conf for node name # for boot commands, inspect emqx.conf for node name
NAME="$(get_config_value node.name)" NAME="$(get_boot_config 'node.name')"
else else
vm_args_file="$(latest_vm_args 'EMQX_NODE__NAME')" vm_args_file="$(latest_vm_args 'EMQX_NODE__NAME')"
NAME="$(grep -E '^-s?name' "${vm_args_file}" | awk '{print $2}')" NAME="$(grep -E '^-s?name' "${vm_args_file}" | awk '{print $2}')"
@ -643,7 +700,7 @@ fi
COOKIE="${EMQX_NODE__COOKIE:-}" COOKIE="${EMQX_NODE__COOKIE:-}"
if [ -z "$COOKIE" ]; then if [ -z "$COOKIE" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
COOKIE="$(get_config_value node.cookie)" COOKIE="$(get_boot_config 'node.cookie')"
else else
vm_args_file="$(latest_vm_args 'EMQX_NODE__COOKIE')" vm_args_file="$(latest_vm_args 'EMQX_NODE__COOKIE')"
COOKIE="$(grep -E '^-setcookie' "${vm_args_file}" | awk '{print $2}')" COOKIE="$(grep -E '^-setcookie' "${vm_args_file}" | awk '{print $2}')"
@ -654,18 +711,14 @@ if [ -z "$COOKIE" ]; then
die "Please set node.cookie in $EMQX_ETC_DIR/emqx.conf or override from environment variable EMQX_NODE__COOKIE" die "Please set node.cookie in $EMQX_ETC_DIR/emqx.conf or override from environment variable EMQX_NODE__COOKIE"
fi fi
## check if OTP version has mnesia_hook patch; if not, fallback to ## check if OTP version has mnesia_hook feature; if not, fallback to
## using Mnesia DB backend. ## using Mnesia DB backend.
if [[ "${EMQX_DB__BACKEND:-}" != "mnesia" if [[ "$IS_BOOT_COMMAND" == 'yes' && "$(get_boot_config 'db.backend')" == "rlog" ]]; then
|| "${EMQX_DB__ROLE:-}" != "core" ]]; then if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'MNESIA_OK'); then
if [[ "$IS_BOOT_COMMAND" == 'yes'
&& "$(get_config_value 'db.backend')" == "rlog" ]]; then
if ! is_otp_compatible; then
echoerr "DB Backend is RLOG, but an incompatible OTP version has been detected. Falling back to using Mnesia DB backend." echoerr "DB Backend is RLOG, but an incompatible OTP version has been detected. Falling back to using Mnesia DB backend."
export EMQX_DB__BACKEND=mnesia export EMQX_DB__BACKEND=mnesia
export EMQX_DB__ROLE=core export EMQX_DB__ROLE=core
fi fi
fi
fi fi
cd "$RUNNER_ROOT_DIR" cd "$RUNNER_ROOT_DIR"
@ -843,12 +896,13 @@ case "${COMMAND}" in
# Setup beam-required vars # Setup beam-required vars
EMU="beam" EMU="beam"
PROGNAME="${0#*/}" PROGNAME="${0}"
export EMU export EMU
export PROGNAME export PROGNAME
# Store passed arguments since they will be erased by `set` # Store passed arguments since they will be erased by `set`
# add emqx_data_dir to boot command so it is visible from 'ps -ef'
ARGS="$*" ARGS="$*"
# shellcheck disable=SC2086 # shellcheck disable=SC2086
@ -879,10 +933,10 @@ case "${COMMAND}" in
fi fi
# Log the startup # Log the startup
logger -t "${REL_NAME}[$$]" "EXEC: $* -- ${1+$ARGS}" logger -t "${REL_NAME}[$$]" "EXEC: $* -- ${1+$ARGS} -emqx_data_dir ${DATA_DIR}"
# Start the VM # Start the VM
exec "$@" -- ${1+$ARGS} exec "$@" -- ${1+$ARGS} -emqx_data_dir "${DATA_DIR}"
;; ;;
ctl) ctl)
@ -906,12 +960,6 @@ case "${COMMAND}" in
relx_nodetool rpcterms "$@" relx_nodetool rpcterms "$@"
;; ;;
root_dir)
assert_node_alive
shift
relx_nodetool "eval" 'code:root_dir()'
;;
eval) eval)
assert_node_alive assert_node_alive

View File

@ -24,9 +24,7 @@
@set script=%~n0 @set script=%~n0
:: for attach and remote_console
@set EPMD_ARG=-start_epmd false -epmd_module ekka_epmd -proto_dist ekka @set EPMD_ARG=-start_epmd false -epmd_module ekka_epmd -proto_dist ekka
:: for erl command
@set ERL_FLAGS=%EPMD_ARG% @set ERL_FLAGS=%EPMD_ARG%
:: Discover the release root directory from the directory :: Discover the release root directory from the directory
@ -70,22 +68,13 @@
@set EMQX_DB__ROLE=core @set EMQX_DB__ROLE=core
@set conf_path="%etc_dir%\emqx.conf" @set conf_path="%etc_dir%\emqx.conf"
:: Extract node name from emqx.conf
@for /f "usebackq delims=" %%I in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% get node.name"`) do @(
@call :set_trim node_name %%I
)
@set node_name=%node_name:"=%
:: Extract node cookie from emqx.conf @for /f "usebackq tokens=1,2 delims==" %%a in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% multi_get node.name node.cookie node.data_dir"`) do @(
@for /f "usebackq delims=" %%I in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% get node.cookie"`) do @( if "%%a"=="node.name" set node_name=%%b
@call :set_trim node_cookie %%I if "%%a"=="node.cookie" set node_cookie=%%b
if "%%a"=="node.data_dir" set data_dir=%%b
) )
@set node_cookie=%node_cookie:"=%
:: Extract data_dir from emqx.conf
@for /f "usebackq delims=" %%I in (`"%escript% %nodetool% hocon -s %schema_mod% -c %conf_path% get node.data_dir"`) do @(
@call :set_trim data_dir %%I
)
@set data_dir=%data_dir:"=% @set data_dir=%data_dir:"=%
:: remove trailing / :: remove trailing /
@if %data_dir:~-1%==/ SET data_dir=%data_dir:~0,-1% @if %data_dir:~-1%==/ SET data_dir=%data_dir:~0,-1%
@ -238,8 +227,3 @@ cd /d %rel_root_dir%
:attach :attach
%erl_exe% -hidden -remsh "%node_name%" -boot "%clean_boot_file_name%" "%node_type%" "remsh_%node_name%" -setcookie "%node_cookie%" %erl_exe% -hidden -remsh "%node_name%" -boot "%clean_boot_file_name%" "%node_type%" "remsh_%node_name%" -setcookie "%node_cookie%"
@goto :eof @goto :eof
:: Trim variable
:set_trim
@set %1=%2
@goto :eof

2
build
View File

@ -70,7 +70,7 @@ make_doc() {
local libs_dir1 libs_dir2 local libs_dir1 libs_dir2
libs_dir1="$("$FIND" "_build/default/lib/" -maxdepth 2 -name ebin -type d)" libs_dir1="$("$FIND" "_build/default/lib/" -maxdepth 2 -name ebin -type d)"
libs_dir2="$("$FIND" "_build/$PROFILE/lib/" -maxdepth 2 -name ebin -type d)" libs_dir2="$("$FIND" "_build/$PROFILE/lib/" -maxdepth 2 -name ebin -type d)"
libs_dir3="$("$FIND" "_build/$PROFILE/checkouts/" -maxdepth 2 -name ebin -type d || true)" libs_dir3="$("$FIND" "_build/$PROFILE/checkouts/" -maxdepth 2 -name ebin -type d 2>/dev/null || true)"
case $PROFILE in case $PROFILE in
emqx-enterprise) emqx-enterprise)
SCHEMA_MODULE='emqx_enterprise_conf_schema' SCHEMA_MODULE='emqx_enterprise_conf_schema'