fix: Erlang distribution over TLS

This commit is contained in:
Zaiming (Stone) Shi 2022-06-23 20:37:26 +02:00
parent e4b22e2dc7
commit b3f16ccb36
8 changed files with 60 additions and 28 deletions

View File

@ -3,9 +3,14 @@
%%
%% More information at: http://erlang.org/doc/apps/ssl/ssl_distribution.html
[{server,
[{certfile, "{{ platform_etc_dir }}/certs/cert.pem"},
[
%{log_level, debug}, %% NOTE: debug level logging impacts performance, and need to set EMQX logging level to 'debug'
{certfile, "{{ platform_etc_dir }}/certs/cert.pem"},
{keyfile, "{{ platform_etc_dir }}/certs/key.pem"},
{secure_renegotiate, true},
{depth, 0}]},
{cacertfile, "{{ platform_etc_dir }}/certs/cacert.pem"},
{verify, verify_none}
]},
{client,
[{secure_renegotiate, true}]}].
[
{verify, verify_none}
]}].

View File

@ -35,19 +35,6 @@
## (Disabled by default..use with caution!)
#-heart
## Specify the erlang distributed protocol.
## Can be one of: inet_tcp, inet6_tcp, inet_tls
#-proto_dist inet_tcp
## The shell is started in a restricted mode.
## In this mode, the shell evaluates a function call only if allowed.
## Prevent user from accidentally calling a function from the prompt that could harm a running system.
-stdlib restricted_shell emqx_restricted_shell
## Specify SSL Options in the file if using SSL for Erlang Distribution.
## Used only when -proto_dist set to inet_tls
-ssl_dist_optfile {{ platform_etc_dir }}/ssl_dist.conf
## Specifies the net_kernel tick time in seconds.
## This is the approximate time a connected node may be unresponsive until
## it is considered down and thereby disconnected.

View File

@ -27,7 +27,7 @@
{jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}},
{cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.0"}}},
{esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.3"}}},
{ekka, {git, "https://github.com/emqx/ekka", {tag, "0.13.0"}}},
{ekka, {git, "https://github.com/emqx/ekka", {tag, "0.13.1"}}},
{gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}},
{hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.28.2"}}},
{pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}},

View File

@ -306,6 +306,7 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
COMPATIBILITY_INFO="$(compatiblity_info 2>&1 || true)"
if ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'BEAM_OK'); then
## not able to start beam.smp
set +x
echoerr "$COMPATIBILITY_INFO"
echoerr "Please ensure it is running on the correct platform:"
echoerr "$BUILD_INFO"
@ -314,6 +315,7 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
exit 1
elif ! (echo -e "$COMPATIBILITY_INFO" | grep -q 'CRYPTO_OK'); then
## not able to start crypto app
set +x
echoerr "$COMPATIBILITY_INFO"
exit 2
fi
@ -322,9 +324,6 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
[ "$DEBUG" -eq 1 ] && set -x
fi
NO_EPMD="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka"
EPMD_ARGS="${EPMD_ARGS:-${NO_EPMD}}"
# Warn the user if ulimit -n is less than 1024
ULIMIT_F=$(ulimit -n)
if [ "$ULIMIT_F" -lt 1024 ]; then
@ -411,7 +410,7 @@ call_hocon() {
## Resolve boot configs in a batch
## This is because starting the Erlang beam with all modules loaded
## and parsing HOCON config + environment variables is a non-trivial task
CONF_KEYS=( 'node.data_dir' 'node.name' 'node.cookie' 'node.db_backend' )
CONF_KEYS=( 'node.data_dir' 'node.name' 'node.cookie' 'node.db_backend' 'cluster.proto_dist' )
if [ "$IS_ENTERPRISE" = 'yes' ]; then
CONF_KEYS+=( 'license.file' 'license.key' )
fi
@ -424,18 +423,24 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
export EMQX_BOOT_CONFIGS
fi
else
# For non-boot commands, we try to get data_dir from ps -ef command
# For non-boot commands, we try to get data_dir and ssl_dist_optfile from 'ps -ef' output
# shellcheck disable=SC2009
PS_LINE="$(ps -ef | grep "\-[r]oot $RUNNER_ROOT_DIR" | grep -oE "\-emqx_data_dir.*"|| true)"
PS_LINE="$(ps -ef | grep "\-[r]oot $RUNNER_ROOT_DIR" || true)"
if [ "$(echo -e "$PS_LINE" | wc -l)" -eq 1 ]; then
## only one emqx node is running
## strip 'emqx_data_dir ' and ' --' because the dir in between may contain spaces
DATA_DIR="$(echo -e "$PS_LINE" | sed -E 's#.+emqx_data_dir[[:blank:]]##g' | sed -E 's#[[:blank:]]--$##g')"
DATA_DIR="$(echo -e "$PS_LINE" | grep -oE "\-emqx_data_dir.*" | sed -E 's#.+emqx_data_dir[[:blank:]]##g' | sed -E 's#[[:blank:]]--$##g' || true)"
if [ "$DATA_DIR" = '' ]; then
## this should not happen unless -emqx_data_dir is not set
die "node_is_not_running!" 1
fi
EMQX_BOOT_CONFIGS="node.data_dir=$DATA_DIR"
# get ssl_dist_optfile option
SSL_DIST_OPTFILE="$(echo -e "$PS_LINE" | grep -oE '+ssl_dist_optfile\s.+\s' | awk '{print $2}' || true)"
if [ -z "$SSL_DIST_OPTFILE" ]; then
EMQX_BOOT_CONFIGS="node.data_dir=${DATA_DIR}\ncluster.proto_dist=inet_tcp"
else
EMQX_BOOT_CONFIGS="node.data_dir=${DATA_DIR}\ncluster.proto_dist=inet_tls"
fi
else
## None or more than one node is running, resolve from boot config
EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
@ -447,6 +452,33 @@ get_boot_config() {
echo -e "$EMQX_BOOT_CONFIGS" | grep "$path_to_value=" | sed -e "s/$path_to_value=//g" | tr -d \"
}
EPMD_ARGS="-start_epmd false -epmd_module ekka_epmd -proto_dist ekka"
PROTO_DIST="$(get_boot_config 'cluster.proto_dist' || true)"
# this environment variable is required by ekka_dist module
# because proto_dist is overriden to ekka, and there is a lack of ekka_tls module
export EKKA_PROTO_DIST_MOD="${PROTO_DIST:-inet_tcp}"
if [ "$EKKA_PROTO_DIST_MOD" = 'inet_tls' ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
SSL_DIST_OPTFILE=${EMQX_SSL_DIST_OPTFILE:-"$EMQX_ETC_DIR/ssl_dist.conf"}
case "$SSL_DIST_OPTFILE" in
*\ *)
# there is unfortunately no way to support space for this option because we'd need to grep
# from 'ps -ef' result to get this option for non-boot commands (nodtool) to run
set +x
echoerr "Got space in: $SSL_DIST_OPTFILE"
echoerr "No space is allowed for Erlang distribution over SSL option file path."
echoerr "Configure it from environment varialbe EMQX_SSL_DIST_OPTFILE."
echoerr "Or make sure emqx root path '$RUNNER_ROOT_DIR' has no space"
exit 1
;;
*)
true
;;
esac
fi
EPMD_ARGS="${EPMD_ARGS} -ssl_dist_optfile $SSL_DIST_OPTFILE"
fi
DATA_DIR="$(get_boot_config 'node.data_dir')"
# ensure no trailing /
DATA_DIR="${DATA_DIR%/}"
@ -472,6 +504,7 @@ check_license() {
if [[ -n "$key_license" && ("$key_license" != "undefined") ]]; then
call_nodetool check_license_key "$key_license"
else
set +x
echoerr "License not found."
echoerr "Please specify one via EMQX_LICENSE__KEY or EMQX_LICENSE__FILE variables"
echoerr "or via license.key|file in emqx_enterprise.conf."
@ -800,6 +833,7 @@ case "${COMMAND}" in
# also log to syslog
logger -t "${REL_NAME}[${PID}]" "STOP: $msg"
# log to user console
set +x
echoerr "Stop failed, $msg"
echo "ERROR: $PID is still around"
ps -p "$PID"

View File

@ -68,6 +68,10 @@ do(Args) ->
halt(1)
end,
%% Mute logger from now on.
%% Otherwise Erlang distribution over TLS (inet_tls_dist) warning logs
%% and supervisor reports may contaminate io:format outputs
logger:set_primary_config(level, none),
case RestArgs of
["getpid"] ->
io:format("~p\n", [list_to_integer(rpc:call(TargetNode, os, getpid, []))]);

View File

@ -52,7 +52,7 @@ defmodule EMQXUmbrella.MixProject do
{:jiffy, github: "emqx/jiffy", tag: "1.0.5", override: true},
{:cowboy, github: "emqx/cowboy", tag: "2.9.0", override: true},
{:esockd, github: "emqx/esockd", tag: "5.9.3", override: true},
{:ekka, github: "emqx/ekka", tag: "0.13.0", override: true},
{:ekka, github: "emqx/ekka", tag: "0.13.1", override: true},
{:gen_rpc, github: "emqx/gen_rpc", tag: "2.8.1", override: true},
{:minirest, github: "emqx/minirest", tag: "1.3.5", override: true},
{:ecpool, github: "emqx/ecpool", tag: "0.5.2"},

View File

@ -54,7 +54,7 @@
, {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}}
, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.0"}}}
, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.3"}}}
, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.13.0"}}}
, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.13.1"}}}
, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}}
, {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.5"}}}
, {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.2"}}}

View File

@ -29,6 +29,7 @@ docker run -d -t --restart=always --name "$NODE1" \
-e EMQX_LOG__CONSOLE_HANDLER__LEVEL=debug \
-e EMQX_NODE_NAME="emqx@$NODE1" \
-e EMQX_NODE_COOKIE="$COOKIE" \
-e EMQX_CLUSTER__PROTO_DIST='inet_tls' \
-p 18083:18083 \
"$IMAGE"
@ -37,6 +38,7 @@ docker run -d -t --restart=always --name "$NODE2" \
-e EMQX_LOG__CONSOLE_HANDLER__LEVEL=debug \
-e EMQX_NODE_NAME="emqx@$NODE2" \
-e EMQX_NODE_COOKIE="$COOKIE" \
-e EMQX_CLUSTER__PROTO_DIST='inet_tls' \
-p 18084:18083 \
"$IMAGE"