diff --git a/.ci/docker-compose-file/.env b/.ci/docker-compose-file/.env index bd925e224..e5564efb7 100644 --- a/.ci/docker-compose-file/.env +++ b/.ci/docker-compose-file/.env @@ -1,5 +1,5 @@ MYSQL_TAG=8 -REDIS_TAG=6 +REDIS_TAG=7.0 MONGO_TAG=5 PGSQL_TAG=13 LDAP_TAG=2.4.50 diff --git a/.ci/docker-compose-file/Makefile.local b/.ci/docker-compose-file/Makefile.local index 2cf0802ce..9c12255e4 100644 --- a/.ci/docker-compose-file/Makefile.local +++ b/.ci/docker-compose-file/Makefile.local @@ -13,10 +13,10 @@ help: up: env \ MYSQL_TAG=8 \ - REDIS_TAG=6 \ + REDIS_TAG=7.0 \ MONGO_TAG=5 \ PGSQL_TAG=13 \ - docker compose \ + docker-compose \ -f .ci/docker-compose-file/docker-compose.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tcp.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tls.yaml \ @@ -34,7 +34,7 @@ up: up -d --build --remove-orphans down: - docker compose \ + docker-compose \ -f .ci/docker-compose-file/docker-compose.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tcp.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tls.yaml \ diff --git a/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml b/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml index 9c03fc65e..f44a71e14 100644 --- a/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml @@ -1,11 +1,57 @@ version: '3.9' - services: - redis_cluster: + + redis-cluster-1: &redis-node + container_name: redis-cluster-1 image: redis:${REDIS_TAG} - container_name: redis-cluster volumes: - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node cluster && tail -f /var/log/redis-server.log" + - ./redis/cluster-tcp:/usr/local/etc/redis + command: redis-server /usr/local/etc/redis/redis.conf networks: - emqx_bridge + + + redis-cluster-2: + <<: *redis-node + container_name: redis-cluster-2 + + redis-cluster-3: + <<: *redis-node + container_name: redis-cluster-3 + + redis-cluster-4: + <<: *redis-node + container_name: redis-cluster-4 + + redis-cluster-5: + <<: *redis-node + container_name: redis-cluster-5 + + redis-cluster-6: + <<: *redis-node + container_name: redis-cluster-6 + + redis-cluster-create: + <<: *redis-node + container_name: redis-cluster-create + command: > + redis-cli + --cluster create + redis-cluster-1:6379 + redis-cluster-2:6379 + redis-cluster-3:6379 + redis-cluster-4:6379 + redis-cluster-5:6379 + redis-cluster-6:6379 + --cluster-replicas 1 + --cluster-yes + --pass "public" + --no-auth-warning + depends_on: + - redis-cluster-1 + - redis-cluster-2 + - redis-cluster-3 + - redis-cluster-4 + - redis-cluster-5 + - redis-cluster-6 + diff --git a/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml b/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml index bfbf1a4a3..988620acb 100644 --- a/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml @@ -1,14 +1,59 @@ version: '3.9' - services: - redis_cluster_tls: - container_name: redis-cluster-tls + + redis-cluster-tls-1: &redis-node + container_name: redis-cluster-tls-1 image: redis:${REDIS_TAG} volumes: - - ../../apps/emqx/etc/certs/cacert.pem:/etc/certs/ca.crt - - ../../apps/emqx/etc/certs/cert.pem:/etc/certs/redis.crt - - ../../apps/emqx/etc/certs/key.pem:/etc/certs/redis.key - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node cluster --tls-enabled && tail -f /var/log/redis-server.log" + - ./redis/cluster-tls:/usr/local/etc/redis + - ../../apps/emqx/etc/certs:/etc/certs + command: redis-server /usr/local/etc/redis/redis.conf networks: - emqx_bridge + + redis-cluster-tls-2: + <<: *redis-node + container_name: redis-cluster-tls-2 + + redis-cluster-tls-3: + <<: *redis-node + container_name: redis-cluster-tls-3 + + redis-cluster-tls-4: + <<: *redis-node + container_name: redis-cluster-tls-4 + + redis-cluster-tls-5: + <<: *redis-node + container_name: redis-cluster-tls-5 + + redis-cluster-tls-6: + <<: *redis-node + container_name: redis-cluster-tls-6 + + redis-cluster-tls-create: + <<: *redis-node + container_name: redis-cluster-tls-create + command: > + redis-cli + --cluster create + redis-cluster-tls-1:6389 + redis-cluster-tls-2:6389 + redis-cluster-tls-3:6389 + redis-cluster-tls-4:6389 + redis-cluster-tls-5:6389 + redis-cluster-tls-6:6389 + --cluster-replicas 1 + --cluster-yes + --pass "public" + --no-auth-warning + --tls + --insecure + depends_on: + - redis-cluster-tls-1 + - redis-cluster-tls-2 + - redis-cluster-tls-3 + - redis-cluster-tls-4 + - redis-cluster-tls-5 + - redis-cluster-tls-6 + diff --git a/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml b/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml index 07c6cfb0a..d395edd2b 100644 --- a/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml @@ -1,11 +1,41 @@ -version: '3.9' +version: "3" services: - redis_sentinel_server: + + redis-sentinel-master: + container_name: redis-sentinel-master + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tcp:/usr/local/etc/redis + command: redis-server /usr/local/etc/redis/master.conf + networks: + - emqx_bridge + + redis-sentinel-slave: + container_name: redis-sentinel-slave + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tcp:/usr/local/etc/redis + command: redis-server /usr/local/etc/redis/slave.conf + networks: + - emqx_bridge + depends_on: + - redis-sentinel-master + + redis-sentinel: container_name: redis-sentinel image: redis:${REDIS_TAG} volumes: - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node sentinel && tail -f /var/log/redis-server.log" + - ./redis/sentinel-tcp/sentinel-base.conf:/usr/local/etc/redis/sentinel-base.conf + depends_on: + - redis-sentinel-master + - redis-sentinel-slave + command: > + bash -c "cp -f /usr/local/etc/redis/sentinel-base.conf /usr/local/etc/redis/sentinel.conf && + redis-sentinel /usr/local/etc/redis/sentinel.conf" networks: - emqx_bridge + + + + diff --git a/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml b/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml index b9eaefa9c..d883e2992 100644 --- a/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml @@ -1,14 +1,44 @@ -version: '3.9' +version: "3" services: - redis_sentinel_server_tls: + + redis-sentinel-tls-master: + container_name: redis-sentinel-tls-master + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tls:/usr/local/etc/redis + - ../../apps/emqx/etc/certs:/etc/certs + command: redis-server /usr/local/etc/redis/master.conf + networks: + - emqx_bridge + + redis-sentinel-tls-slave: + container_name: redis-sentinel-tls-slave + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tls:/usr/local/etc/redis + - ../../apps/emqx/etc/certs:/etc/certs + command: redis-server /usr/local/etc/redis/slave.conf + networks: + - emqx_bridge + depends_on: + - redis-sentinel-tls-master + + redis-sentinel-tls: container_name: redis-sentinel-tls image: redis:${REDIS_TAG} volumes: - - ../../apps/emqx/etc/certs/cacert.pem:/etc/certs/ca.crt - - ../../apps/emqx/etc/certs/cert.pem:/etc/certs/redis.crt - - ../../apps/emqx/etc/certs/key.pem:/etc/certs/redis.key - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node sentinel --tls-enabled && tail -f /var/log/redis-server.log" + - ./redis/sentinel-tls/sentinel-base.conf:/usr/local/etc/redis/sentinel-base.conf + - ../../apps/emqx/etc/certs:/etc/certs + depends_on: + - redis-sentinel-tls-master + - redis-sentinel-tls-slave + command: > + bash -c "cp -f /usr/local/etc/redis/sentinel-base.conf /usr/local/etc/redis/sentinel.conf && + redis-sentinel /usr/local/etc/redis/sentinel.conf" networks: - emqx_bridge + + + + diff --git a/.ci/docker-compose-file/redis/.gitignore b/.ci/docker-compose-file/redis/.gitignore deleted file mode 100644 index 23ffe8469..000000000 --- a/.ci/docker-compose-file/redis/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -r700?i.log -nodes.700?.conf -*.rdb diff --git a/.ci/docker-compose-file/redis/cluster-tcp/redis.conf b/.ci/docker-compose-file/redis/cluster-tcp/redis.conf new file mode 100644 index 000000000..79a0d8a73 --- /dev/null +++ b/.ci/docker-compose-file/redis/cluster-tcp/redis.conf @@ -0,0 +1,18 @@ +bind :: 0.0.0.0 +port 6379 +requirepass public + +cluster-enabled yes + +masterauth public + +protected-mode no +daemonize no + +loglevel notice +logfile "" + +always-show-logo no +save "" +appendonly no + diff --git a/.ci/docker-compose-file/redis/cluster-tls/redis.conf b/.ci/docker-compose-file/redis/cluster-tls/redis.conf new file mode 100644 index 000000000..3020f46a7 --- /dev/null +++ b/.ci/docker-compose-file/redis/cluster-tls/redis.conf @@ -0,0 +1,28 @@ +bind :: 0.0.0.0 +port 6379 +requirepass public + +cluster-enabled yes + +masterauth public + +tls-port 6389 +tls-cert-file /etc/certs/cert.pem +tls-key-file /etc/certs/key.pem +tls-ca-cert-file /etc/certs/cacert.pem +tls-auth-clients no + +tls-replication yes +tls-cluster yes + + +protected-mode no +daemonize no + +loglevel notice +logfile "" + +always-show-logo no +save "" +appendonly no + diff --git a/.ci/docker-compose-file/redis/redis-tls.conf b/.ci/docker-compose-file/redis/redis-tls.conf deleted file mode 100644 index c503dc2e8..000000000 --- a/.ci/docker-compose-file/redis/redis-tls.conf +++ /dev/null @@ -1,12 +0,0 @@ -daemonize yes -bind 0.0.0.0 :: -logfile /var/log/redis-server.log -protected-mode no -requirepass public -masterauth public - -tls-cert-file /etc/certs/redis.crt -tls-key-file /etc/certs/redis.key -tls-ca-cert-file /etc/certs/ca.crt -tls-replication yes -tls-cluster yes diff --git a/.ci/docker-compose-file/redis/redis.conf b/.ci/docker-compose-file/redis/redis.conf deleted file mode 100644 index 484d9abf9..000000000 --- a/.ci/docker-compose-file/redis/redis.conf +++ /dev/null @@ -1,6 +0,0 @@ -daemonize yes -bind 0.0.0.0 :: -logfile /var/log/redis-server.log -protected-mode no -requirepass public -masterauth public diff --git a/.ci/docker-compose-file/redis/redis.sh b/.ci/docker-compose-file/redis/redis.sh deleted file mode 100755 index be6462249..000000000 --- a/.ci/docker-compose-file/redis/redis.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash - -set -x - -LOCAL_IP=$(hostname -i | grep -oE '((25[0-5]|(2[0-4]|1[0-9]|[1-9]|)[0-9])\.){3}(25[0-5]|(2[0-4]|1[0-9]|[1-9]|)[0-9])' | head -n 1) - -node=single -tls=false -while [[ $# -gt 0 ]] -do -key="$1" - -case $key in - -n|--node) - node="$2" - shift # past argument - shift # past value - ;; - --tls-enabled) - tls=true - shift # past argument - ;; - *) - shift # past argument - ;; -esac -done - -rm -f \ - /data/conf/r7000i.log \ - /data/conf/r7001i.log \ - /data/conf/r7002i.log \ - /data/conf/nodes.7000.conf \ - /data/conf/nodes.7001.conf \ - /data/conf/nodes.7002.conf - -if [ "$node" = "cluster" ]; then - if $tls; then - redis-server /data/conf/redis-tls.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --tls-port 8000 --cluster-enabled yes - redis-server /data/conf/redis-tls.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --tls-port 8001 --cluster-enabled yes - redis-server /data/conf/redis-tls.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --tls-port 8002 --cluster-enabled yes - else - redis-server /data/conf/redis.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --cluster-enabled yes - redis-server /data/conf/redis.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --cluster-enabled yes - redis-server /data/conf/redis.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --cluster-enabled yes - fi -elif [ "$node" = "sentinel" ]; then - if $tls; then - redis-server /data/conf/redis-tls.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --tls-port 8000 --cluster-enabled no - redis-server /data/conf/redis-tls.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --tls-port 8001 --cluster-enabled no --slaveof "$LOCAL_IP" 8000 - redis-server /data/conf/redis-tls.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --tls-port 8002 --cluster-enabled no --slaveof "$LOCAL_IP" 8000 - - else - redis-server /data/conf/redis.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --cluster-enabled no - redis-server /data/conf/redis.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --cluster-enabled no --slaveof "$LOCAL_IP" 7000 - redis-server /data/conf/redis.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --cluster-enabled no --slaveof "$LOCAL_IP" 7000 - fi -fi - -REDIS_LOAD_FLG=true - -while $REDIS_LOAD_FLG; -do - sleep 1 - redis-cli --pass public --no-auth-warning -p 7000 info 1> /data/conf/r7000i.log 2> /dev/null - if ! [ -s /data/conf/r7000i.log ]; then - continue - fi - redis-cli --pass public --no-auth-warning -p 7001 info 1> /data/conf/r7001i.log 2> /dev/null - if ! [ -s /data/conf/r7001i.log ]; then - continue - fi - redis-cli --pass public --no-auth-warning -p 7002 info 1> /data/conf/r7002i.log 2> /dev/null; - if ! [ -s /data/conf/r7002i.log ]; then - continue - fi - if [ "$node" = "cluster" ] ; then - if $tls; then - yes "yes" | redis-cli --cluster create "$LOCAL_IP:8000" "$LOCAL_IP:8001" "$LOCAL_IP:8002" \ - --pass public --no-auth-warning \ - --tls true --cacert /etc/certs/ca.crt \ - --cert /etc/certs/redis.crt --key /etc/certs/redis.key - else - yes "yes" | redis-cli --cluster create "$LOCAL_IP:7000" "$LOCAL_IP:7001" "$LOCAL_IP:7002" \ - --pass public --no-auth-warning - fi - elif [ "$node" = "sentinel" ]; then - tee /_sentinel.conf>/dev/null << EOF -port 26379 -bind 0.0.0.0 :: -daemonize yes -logfile /var/log/redis-server.log -dir /tmp -EOF - if $tls; then - cat >>/_sentinel.conf<>/_sentinel.conf<long_gc log is emitted, +and an MQTT message is published to the system topic $SYS/sysmon/long_gc. +""" + zh: """当系统检测到某个 Erlang 进程垃圾回收占用过长时间,会触发一条带有 long_gc 关键字的日志。 +同时还会发布一条主题为 $SYS/sysmon/long_gc 的 MQTT 系统消息。 +""" } label { en: "Enable Long GC monitoring." @@ -199,8 +203,12 @@ emqx_schema { sysmon_vm_long_schedule { desc { - en: "Enable Long Schedule monitoring." - zh: "启用长调度监控。" + en: """When the Erlang VM detect a task scheduled for too long, a warning level 'long_schedule' log is emitted, +and an MQTT message is published to the system topic $SYS/sysmon/long_schedule. +""" + zh: """启用后,如果 Erlang VM 调度器出现某个任务占用时间过长时,会触发一条带有 'long_schedule' 关键字的日志。 +同时还会发布一条主题为 $SYS/sysmon/long_schedule 的 MQTT 系统消息。 +""" } label { en: "Enable Long Schedule monitoring." @@ -210,8 +218,13 @@ emqx_schema { sysmon_vm_large_heap { desc { - en: "Enable Large Heap monitoring." - zh: "启用大 heap 监控。" + en: """When an Erlang process consumed a large amount of memory for its heap space, +the system will write a warning level large_heap log, and an MQTT message is published to +the system topic $SYS/sysmon/large_heap. +""" + zh: """启用后,当一个 Erlang 进程申请了大量内存,系统会触发一条带有 large_heap 关键字的 +warning 级别日志。同时还会发布一条主题为 $SYS/sysmon/busy_dist_port 的 MQTT 系统消息。 +""" } label { en: "Enable Large Heap monitoring." @@ -221,8 +234,13 @@ emqx_schema { sysmon_vm_busy_dist_port { desc { - en: "Enable Busy Distribution Port monitoring." - zh: "启用分布式端口过忙监控。" + en: """When the RPC connection used to communicate with other nodes in the cluster is overloaded, +there will be a busy_dist_port warning log, +and an MQTT message is published to system topic $SYS/sysmon/busy_dist_port. +""" + zh: """启用后,当用于集群接点之间 RPC 的连接过忙时,会触发一条带有 busy_dist_port 关键字的 warning 级别日志。 +同时还会发布一条主题为 $SYS/sysmon/busy_dist_port 的 MQTT 系统消息。 +""" } label { en: "Enable Busy Distribution Port monitoring." @@ -232,8 +250,12 @@ emqx_schema { sysmon_vm_busy_port { desc { - en: "Enable Busy Port monitoring." - zh: "启用端口过忙监控。" + en: """When a port (e.g. TCP socket) is overloaded, there will be a busy_port warning log, +and an MQTT message is published to the system topic $SYS/sysmon/busy_port. +""" + zh: """当一个系统接口(例如 TCP socket)过忙,会触发一条带有 busy_port 关键字的 warning 级别的日志。 +同时还会发布一条主题为 $SYS/sysmon/busy_port 的 MQTT 系统消息。 +""" } label { en: "Enable Busy Port monitoring." diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index 7437bc299..4e4066eef 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,10 +32,10 @@ %% `apps/emqx/src/bpapi/README.md' %% Community edition --define(EMQX_RELEASE_CE, "5.0.15"). +-define(EMQX_RELEASE_CE, "5.0.16"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.0.0-rc.1"). +-define(EMQX_RELEASE_EE, "5.0.0"). %% the HTTP API version -define(EMQX_API_VERSION, "5.0"). diff --git a/apps/emqx/include/logger.hrl b/apps/emqx/include/logger.hrl index e93aa46f4..27ffc6cc0 100644 --- a/apps/emqx/include/logger.hrl +++ b/apps/emqx/include/logger.hrl @@ -48,9 +48,9 @@ -define(TRACE(Level, Tag, Msg, Meta), begin case persistent_term:get(?TRACE_FILTER, []) of [] -> ok; - %% We can't bind filter list to a variablebecause we pollute the calling scope with it. + %% We can't bind filter list to a variable because we pollute the calling scope with it. %% We also don't want to wrap the macro body in a fun - %% beacause this adds overhead to the happy path. + %% because this adds overhead to the happy path. %% So evaluate `persistent_term:get` twice. _ -> emqx_trace:log(persistent_term:get(?TRACE_FILTER, []), Msg, (Meta)#{trace_tag => Tag}) end, diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 0ea42c0fb..cad599436 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -29,7 +29,7 @@ {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.4"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.13.9"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}}, - {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.0"}}}, + {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.3"}}}, {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}}, {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.1"}}}, {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.0"}}} diff --git a/apps/emqx/src/config/emqx_config_logger.erl b/apps/emqx/src/config/emqx_config_logger.erl index 4b46e43d6..15e4d3959 100644 --- a/apps/emqx/src/config/emqx_config_logger.erl +++ b/apps/emqx/src/config/emqx_config_logger.erl @@ -18,6 +18,7 @@ -behaviour(emqx_config_handler). %% API +-export([tr_handlers/1, tr_level/1]). -export([add_handler/0, remove_handler/0, refresh_config/0]). -export([post_config_update/5]). @@ -37,38 +38,238 @@ remove_handler() -> %% so we need to refresh the logger config after this node starts. %% It will not affect the logger config when cluster-override.conf is unchanged. refresh_config() -> - case emqx:get_raw_config(?LOG, undefined) of - %% no logger config when CT is running. - undefined -> - ok; - Log -> - {ok, _} = emqx:update_config(?LOG, Log), - ok - end. + Overrides = emqx_config:read_override_confs(), + refresh_config(Overrides). -post_config_update(?LOG, _Req, _NewConf, _OldConf, AppEnvs) -> - Kernel = proplists:get_value(kernel, AppEnvs), - NewHandlers = proplists:get_value(logger, Kernel, []), - Level = proplists:get_value(logger_level, Kernel, warning), - ok = update_log_handlers(NewHandlers), - ok = emqx_logger:set_primary_log_level(Level), - application:set_env(kernel, logger_level, Level), - ok; +refresh_config(#{<<"log">> := _}) -> + %% read the checked config + LogConfig = emqx:get_config(?LOG, undefined), + Conf = #{log => LogConfig}, + ok = do_refresh_config(Conf); +refresh_config(_) -> + %% No config override found for 'log', do nothing + %% because the 'kernel' app should already be configured + %% from the base configs. i.e. emqx.conf + env vars + ok. + +%% this call is shared between initial config refresh at boot +%% and dynamic config update from HTTP API +do_refresh_config(Conf) -> + Handlers = tr_handlers(Conf), + ok = update_log_handlers(Handlers), + Level = tr_level(Conf), + ok = maybe_update_log_level(Level), + ok. + +post_config_update(?LOG, _Req, NewConf, _OldConf, _AppEnvs) -> + ok = do_refresh_config(#{log => NewConf}); post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) -> ok. +maybe_update_log_level(NewLevel) -> + OldLevel = emqx_logger:get_primary_log_level(), + case OldLevel =:= NewLevel of + true -> + %% no change + ok; + false -> + ok = emqx_logger:set_primary_log_level(NewLevel), + %% also update kernel's logger_level for troubleshooting + %% what is actually in effect is the logger's primary log level + ok = application:set_env(kernel, logger_level, NewLevel), + log_to_console("Config override: log level is set to '~p'~n", [NewLevel]) + end. + +log_to_console(Fmt, Args) -> + io:format(standard_error, Fmt, Args). + update_log_handlers(NewHandlers) -> OldHandlers = application:get_env(kernel, logger, []), - lists:foreach( - fun({handler, HandlerId, _Mod, _Conf}) -> - logger:remove_handler(HandlerId) + NewHandlersIds = lists:map(fun({handler, Id, _Mod, _Conf}) -> Id end, NewHandlers), + OldHandlersIds = lists:map(fun({handler, Id, _Mod, _Conf}) -> Id end, OldHandlers), + Removes = lists:map(fun(Id) -> {removed, Id} end, OldHandlersIds -- NewHandlersIds), + MapFn = fun({handler, Id, Mod, Conf} = Handler) -> + case lists:keyfind(Id, 2, OldHandlers) of + {handler, Id, Mod, Conf} -> + %% no change + false; + {handler, Id, _Mod, _Conf} -> + {true, {updated, Handler}}; + false -> + {true, {enabled, Handler}} + end + end, + AddsAndUpdates = lists:filtermap(MapFn, NewHandlers), + lists:foreach(fun update_log_handler/1, Removes ++ AddsAndUpdates), + ok = application:set_env(kernel, logger, NewHandlers), + ok. + +update_log_handler({removed, Id}) -> + log_to_console("Config override: ~s is removed~n", [id_for_log(Id)]), + logger:remove_handler(Id); +update_log_handler({Action, {handler, Id, Mod, Conf}}) -> + log_to_console("Config override: ~s is ~p~n", [id_for_log(Id), Action]), + % may return {error, {not_found, Id}} + _ = logger:remove_handler(Id), + case logger:add_handler(Id, Mod, Conf) of + ok -> + ok; + %% Don't crash here, otherwise the cluster rpc will retry the wrong handler forever. + {error, Reason} -> + log_to_console( + "Config override: ~s is ~p, but failed to add handler: ~p~n", + [id_for_log(Id), Action, Reason] + ) + end, + ok. + +id_for_log(console) -> "log.console_handler"; +id_for_log(Other) -> "log.file_handlers." ++ atom_to_list(Other). + +atom(Id) when is_binary(Id) -> binary_to_atom(Id, utf8); +atom(Id) when is_atom(Id) -> Id. + +%% @doc Translate raw config to app-env compatible log handler configs list. +tr_handlers(Conf) -> + %% mute the default handler + tr_console_handler(Conf) ++ + tr_file_handlers(Conf). + +%% For the default logger that outputs to console +tr_console_handler(Conf) -> + case conf_get("log.console_handler.enable", Conf) of + true -> + ConsoleConf = conf_get("log.console_handler", Conf), + [ + {handler, console, logger_std_h, #{ + level => conf_get("log.console_handler.level", Conf), + config => (log_handler_conf(ConsoleConf))#{type => standard_io}, + formatter => log_formatter(ConsoleConf), + filters => log_filter(ConsoleConf) + }} + ]; + false -> + [] + end. + +%% For the file logger +tr_file_handlers(Conf) -> + Handlers = logger_file_handlers(Conf), + lists:map(fun tr_file_handler/1, Handlers). + +tr_file_handler({HandlerName, SubConf}) -> + {handler, atom(HandlerName), logger_disk_log_h, #{ + level => conf_get("level", SubConf), + config => (log_handler_conf(SubConf))#{ + type => + case conf_get("rotation.enable", SubConf) of + true -> wrap; + _ -> halt + end, + file => conf_get("file", SubConf), + max_no_files => conf_get("rotation.count", SubConf), + max_no_bytes => conf_get("max_size", SubConf) + }, + formatter => log_formatter(SubConf), + filters => log_filter(SubConf), + filesync_repeat_interval => no_repeat + }}. + +logger_file_handlers(Conf) -> + Handlers = maps:to_list(conf_get("log.file_handlers", Conf, #{})), + lists:filter( + fun({_Name, Opts}) -> + B = conf_get("enable", Opts), + true = is_boolean(B), + B end, - OldHandlers -- NewHandlers - ), - lists:foreach( - fun({handler, HandlerId, Mod, Conf}) -> - logger:add_handler(HandlerId, Mod, Conf) + Handlers + ). + +conf_get(Key, Conf) -> emqx_schema:conf_get(Key, Conf). +conf_get(Key, Conf, Default) -> emqx_schema:conf_get(Key, Conf, Default). + +log_handler_conf(Conf) -> + SycModeQlen = conf_get("sync_mode_qlen", Conf), + DropModeQlen = conf_get("drop_mode_qlen", Conf), + FlushQlen = conf_get("flush_qlen", Conf), + Overkill = conf_get("overload_kill", Conf), + BurstLimit = conf_get("burst_limit", Conf), + #{ + sync_mode_qlen => SycModeQlen, + drop_mode_qlen => DropModeQlen, + flush_qlen => FlushQlen, + overload_kill_enable => conf_get("enable", Overkill), + overload_kill_qlen => conf_get("qlen", Overkill), + overload_kill_mem_size => conf_get("mem_size", Overkill), + overload_kill_restart_after => conf_get("restart_after", Overkill), + burst_limit_enable => conf_get("enable", BurstLimit), + burst_limit_max_count => conf_get("max_count", BurstLimit), + burst_limit_window_time => conf_get("window_time", BurstLimit) + }. + +log_formatter(Conf) -> + CharsLimit = + case conf_get("chars_limit", Conf) of + unlimited -> unlimited; + V when V > 0 -> V end, - NewHandlers -- OldHandlers - ), - application:set_env(kernel, logger, NewHandlers). + TimeOffSet = + case conf_get("time_offset", Conf) of + "system" -> ""; + "utc" -> 0; + OffSetStr -> OffSetStr + end, + SingleLine = conf_get("single_line", Conf), + Depth = conf_get("max_depth", Conf), + do_formatter(conf_get("formatter", Conf), CharsLimit, SingleLine, TimeOffSet, Depth). + +%% helpers +do_formatter(json, CharsLimit, SingleLine, TimeOffSet, Depth) -> + {emqx_logger_jsonfmt, #{ + chars_limit => CharsLimit, + single_line => SingleLine, + time_offset => TimeOffSet, + depth => Depth + }}; +do_formatter(text, CharsLimit, SingleLine, TimeOffSet, Depth) -> + {emqx_logger_textfmt, #{ + template => [time, " [", level, "] ", msg, "\n"], + chars_limit => CharsLimit, + single_line => SingleLine, + time_offset => TimeOffSet, + depth => Depth + }}. + +log_filter(Conf) -> + case conf_get("supervisor_reports", Conf) of + error -> [{drop_progress_reports, {fun logger_filters:progress/2, stop}}]; + progress -> [] + end. + +tr_level(Conf) -> + ConsoleLevel = conf_get("log.console_handler.level", Conf, undefined), + FileLevels = [ + conf_get("level", SubConf) + || {_, SubConf} <- + logger_file_handlers(Conf) + ], + case FileLevels ++ [ConsoleLevel || ConsoleLevel =/= undefined] of + %% warning is the default level we should use + [] -> warning; + Levels -> least_severe_log_level(Levels) + end. + +least_severe_log_level(Levels) -> + hd(sort_log_levels(Levels)). + +sort_log_levels(Levels) -> + lists:sort( + fun(A, B) -> + case logger:compare_levels(A, B) of + R when R == lt; R == eq -> true; + gt -> false + end + end, + Levels + ). diff --git a/apps/emqx/src/emqx.app.src b/apps/emqx/src/emqx.app.src index 270d36a5e..c812b2217 100644 --- a/apps/emqx/src/emqx.app.src +++ b/apps/emqx/src/emqx.app.src @@ -3,7 +3,7 @@ {id, "emqx"}, {description, "EMQX Core"}, % strict semver, bump manually! - {vsn, "5.0.16"}, + {vsn, "5.0.17"}, {modules, []}, {registered, []}, {applications, [ diff --git a/apps/emqx/src/emqx_alarm.erl b/apps/emqx/src/emqx_alarm.erl index 209715a85..84c40ef2a 100644 --- a/apps/emqx/src/emqx_alarm.erl +++ b/apps/emqx/src/emqx_alarm.erl @@ -325,19 +325,20 @@ deactivate_alarm( false -> ok end, + Now = erlang:system_time(microsecond), HistoryAlarm = make_deactivated_alarm( ActivateAt, Name, Details0, Msg0, - erlang:system_time(microsecond) + Now ), DeActAlarm = make_deactivated_alarm( ActivateAt, Name, Details, normalize_message(Name, iolist_to_binary(Message)), - erlang:system_time(microsecond) + Now ), mria:dirty_write(?DEACTIVATED_ALARM, HistoryAlarm), mria:dirty_delete(?ACTIVATED_ALARM, Name), diff --git a/apps/emqx/src/emqx_cm.erl b/apps/emqx/src/emqx_cm.erl index 66e9a2aee..77bc44eeb 100644 --- a/apps/emqx/src/emqx_cm.erl +++ b/apps/emqx/src/emqx_cm.erl @@ -152,7 +152,7 @@ start_link() -> insert_channel_info(ClientId, Info, Stats) -> Chan = {ClientId, self()}, true = ets:insert(?CHAN_INFO_TAB, {Chan, Info, Stats}), - ?tp(debug, insert_channel_info, #{client_id => ClientId}), + ?tp(debug, insert_channel_info, #{clientid => ClientId}), ok. %% @private diff --git a/apps/emqx/src/emqx_config.erl b/apps/emqx/src/emqx_config.erl index ba4095daa..6d706316c 100644 --- a/apps/emqx/src/emqx_config.erl +++ b/apps/emqx/src/emqx_config.erl @@ -24,6 +24,7 @@ init_load/2, init_load/3, read_override_conf/1, + read_override_confs/0, delete_override_conf_files/0, check_config/2, fill_defaults/1, @@ -326,9 +327,7 @@ init_load(SchemaMod, RawConf, Opts) when is_map(RawConf) -> ok = save_schema_mod_and_names(SchemaMod), %% Merge environment variable overrides on top RawConfWithEnvs = merge_envs(SchemaMod, RawConf), - ClusterOverrides = read_override_conf(#{override_to => cluster}), - LocalOverrides = read_override_conf(#{override_to => local}), - Overrides = hocon:deep_merge(ClusterOverrides, LocalOverrides), + Overrides = read_override_confs(), RawConfWithOverrides = hocon:deep_merge(RawConfWithEnvs, Overrides), RootNames = get_root_names(), RawConfAll = raw_conf_with_default(SchemaMod, RootNames, RawConfWithOverrides, Opts), @@ -337,6 +336,12 @@ init_load(SchemaMod, RawConf, Opts) when is_map(RawConf) -> save_to_app_env(AppEnvs), ok = save_to_config_map(CheckedConf, RawConfAll). +%% @doc Read merged cluster + local overrides. +read_override_confs() -> + ClusterOverrides = read_override_conf(#{override_to => cluster}), + LocalOverrides = read_override_conf(#{override_to => local}), + hocon:deep_merge(ClusterOverrides, LocalOverrides). + %% keep the raw and non-raw conf has the same keys to make update raw conf easier. raw_conf_with_default(SchemaMod, RootNames, RawConf, #{raw_with_default := true}) -> Fun = fun(Name, Acc) -> @@ -424,7 +429,13 @@ check_config(SchemaMod, RawConf, Opts0) -> %% it's maybe too much when reporting to the user -spec compact_errors(any(), any()) -> no_return(). compact_errors(Schema, [Error0 | More]) when is_map(Error0) -> - Error1 = Error0#{discarded_errors_count => length(More)}, + Error1 = + case length(More) of + 0 -> + Error0; + _ -> + Error0#{unshown_errors => length(More)} + end, Error = case is_atom(Schema) of true -> @@ -581,7 +592,6 @@ save_to_override_conf(RawConf, Opts) -> add_handlers() -> ok = emqx_config_logger:add_handler(), emqx_sys_mon:add_handler(), - emqx_config_logger:refresh_config(), ok. remove_handlers() -> @@ -593,8 +603,16 @@ load_hocon_file(FileName, LoadType) -> case filelib:is_regular(FileName) of true -> Opts = #{include_dirs => include_dirs(), format => LoadType}, - {ok, Raw0} = hocon:load(FileName, Opts), - Raw0; + case hocon:load(FileName, Opts) of + {ok, Raw0} -> + Raw0; + {error, Reason} -> + throw(#{ + msg => failed_to_load_conf, + reason => Reason, + file => FileName + }) + end; false -> #{} end. diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index 5ed302a6f..5b783f2fe 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -550,6 +550,7 @@ handle_msg( }, handle_incoming(Packet, NState); handle_msg({incoming, Packet}, State) -> + ?TRACE("MQTT", "mqtt_packet_received", #{packet => Packet}), handle_incoming(Packet, State); handle_msg({outgoing, Packets}, State) -> handle_outgoing(Packets, State); @@ -731,6 +732,12 @@ handle_timeout(TRef, Msg, State) -> %% Parse incoming data -compile({inline, [when_bytes_in/3]}). when_bytes_in(Oct, Data, State) -> + ?SLOG(debug, #{ + msg => "raw_bin_received", + size => Oct, + bin => binary_to_list(binary:encode_hex(Data)), + type => "hex" + }), {Packets, NState} = parse_incoming(Data, [], State), Len = erlang:length(Packets), check_limiter( @@ -783,7 +790,6 @@ parse_incoming(Data, Packets, State = #state{parse_state = ParseState}) -> handle_incoming(Packet, State) when is_record(Packet, mqtt_packet) -> ok = inc_incoming_stats(Packet), - ?TRACE("MQTT", "mqtt_packet_received", #{packet => Packet}), with_channel(handle_in, [Packet], State); handle_incoming(FrameError, State) -> with_channel(handle_in, [FrameError], State). diff --git a/apps/emqx/src/emqx_logger_textfmt.erl b/apps/emqx/src/emqx_logger_textfmt.erl index 3695929d9..fb27681b8 100644 --- a/apps/emqx/src/emqx_logger_textfmt.erl +++ b/apps/emqx/src/emqx_logger_textfmt.erl @@ -22,20 +22,49 @@ check_config(X) -> logger_formatter:check_config(X). -format(#{msg := {report, Report0}, meta := Meta} = Event, Config) when is_map(Report0) -> - Report1 = enrich_report_mfa(Report0, Meta), - Report2 = enrich_report_clientid(Report1, Meta), - Report3 = enrich_report_peername(Report2, Meta), - Report4 = enrich_report_topic(Report3, Meta), - logger_formatter:format(Event#{msg := {report, Report4}}, Config); +format(#{msg := {report, ReportMap}, meta := Meta} = Event, Config) when is_map(ReportMap) -> + Report = enrich_report(ReportMap, Meta), + logger_formatter:format(Event#{msg := {report, Report}}, Config); format(#{msg := {string, String}} = Event, Config) -> format(Event#{msg => {"~ts ", [String]}}, Config); +%% trace format(#{msg := Msg0, meta := Meta} = Event, Config) -> Msg1 = enrich_client_info(Msg0, Meta), Msg2 = enrich_mfa(Msg1, Meta), Msg3 = enrich_topic(Msg2, Meta), logger_formatter:format(Event#{msg := Msg3}, Config). +enrich_report(ReportRaw, Meta) -> + %% clientid and peername always in emqx_conn's process metadata. + %% topic can be put in meta using ?SLOG/3, or put in msg's report by ?SLOG/2 + Topic = + case maps:get(topic, Meta, undefined) of + undefined -> maps:get(topic, ReportRaw, undefined); + Topic0 -> Topic0 + end, + ClientId = maps:get(clientid, Meta, undefined), + Peer = maps:get(peername, Meta, undefined), + MFA = maps:get(mfa, Meta, undefined), + Line = maps:get(line, Meta, undefined), + Msg = maps:get(msg, ReportRaw, undefined), + lists:foldl( + fun + ({_, undefined}, Acc) -> Acc; + (Item, Acc) -> [Item | Acc] + end, + maps:to_list(maps:without([topic, msg, clientid], ReportRaw)), + [ + {topic, try_format_unicode(Topic)}, + {clientid, try_format_unicode(ClientId)}, + {peername, Peer}, + {line, Line}, + {mfa, mfa(MFA)}, + {msg, Msg} + ] + ). + +try_format_unicode(undefined) -> + undefined; try_format_unicode(Char) -> List = try @@ -53,30 +82,6 @@ try_format_unicode(Char) -> _ -> List end. -enrich_report_mfa(Report, #{mfa := Mfa, line := Line}) -> - Report#{mfa => mfa(Mfa), line => Line}; -enrich_report_mfa(Report, _) -> - Report. - -enrich_report_clientid(Report, #{clientid := ClientId}) -> - Report#{clientid => try_format_unicode(ClientId)}; -enrich_report_clientid(Report, _) -> - Report. - -enrich_report_peername(Report, #{peername := Peername}) -> - Report#{peername => Peername}; -enrich_report_peername(Report, _) -> - Report. - -%% clientid and peername always in emqx_conn's process metadata. -%% topic can be put in meta using ?SLOG/3, or put in msg's report by ?SLOG/2 -enrich_report_topic(Report, #{topic := Topic}) -> - Report#{topic => try_format_unicode(Topic)}; -enrich_report_topic(Report = #{topic := Topic}, _) -> - Report#{topic => try_format_unicode(Topic)}; -enrich_report_topic(Report, _) -> - Report. - enrich_mfa({Fmt, Args}, #{mfa := Mfa, line := Line}) when is_list(Fmt) -> {Fmt ++ " mfa: ~ts line: ~w", Args ++ [mfa(Mfa), Line]}; enrich_mfa(Msg, _) -> @@ -96,4 +101,5 @@ enrich_topic({Fmt, Args}, #{topic := Topic}) when is_list(Fmt) -> enrich_topic(Msg, _) -> Msg. -mfa({M, F, A}) -> atom_to_list(M) ++ ":" ++ atom_to_list(F) ++ "/" ++ integer_to_list(A). +mfa(undefined) -> undefined; +mfa({M, F, A}) -> [atom_to_list(M), ":", atom_to_list(F), "/" ++ integer_to_list(A)]. diff --git a/apps/emqx/src/emqx_misc.erl b/apps/emqx/src/emqx_misc.erl index c20227c07..fbeec8724 100644 --- a/apps/emqx/src/emqx_misc.erl +++ b/apps/emqx/src/emqx_misc.erl @@ -609,7 +609,11 @@ do_redact(K, V, Checker) -> -define(REDACT_VAL, "******"). redact_v(V) when is_binary(V) -> <>; -redact_v(_V) -> ?REDACT_VAL. +%% The HOCON schema system may generate sensitive values with this format +redact_v([{str, Bin}]) when is_binary(Bin) -> + [{str, <>}]; +redact_v(_V) -> + ?REDACT_VAL. is_redacted(K, V) -> do_is_redacted(K, V, fun is_sensitive_key/1). diff --git a/apps/emqx/src/emqx_os_mon.erl b/apps/emqx/src/emqx_os_mon.erl index a06f56a4c..c5ce35bf9 100644 --- a/apps/emqx/src/emqx_os_mon.erl +++ b/apps/emqx/src/emqx_os_mon.erl @@ -93,9 +93,9 @@ init([]) -> %% memsup is not reliable, ignore memsup:set_sysmem_high_watermark(1.0), SysHW = init_os_monitor(), - _ = start_mem_check_timer(), - _ = start_cpu_check_timer(), - {ok, #{sysmem_high_watermark => SysHW}}. + MemRef = start_mem_check_timer(), + CpuRef = start_cpu_check_timer(), + {ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}. init_os_monitor() -> init_os_monitor(emqx:get_config([sysmon, os])). @@ -125,13 +125,15 @@ handle_cast(Msg, State) -> handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = State) -> ok = update_mem_alarm_status(HWM), - ok = start_mem_check_timer(), - {noreply, State}; + Ref = start_mem_check_timer(), + {noreply, State#{mem_time_ref => Ref}}; handle_info({timeout, _Timer, cpu_check}, State) -> CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100, CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100, - case emqx_vm:cpu_util() of - 0 -> + CPUVal = emqx_vm:cpu_util(), + case CPUVal of + %% 0 or 0.0 + Busy when Busy == 0 -> ok; Busy when Busy > CPUHighWatermark -> _ = emqx_alarm:activate( @@ -156,11 +158,14 @@ handle_info({timeout, _Timer, cpu_check}, State) -> _Busy -> ok end, - ok = start_cpu_check_timer(), - {noreply, State}; -handle_info({monitor_conf_update, OS}, _State) -> + Ref = start_cpu_check_timer(), + {noreply, State#{cpu_time_ref => Ref}}; +handle_info({monitor_conf_update, OS}, State) -> + cancel_outdated_timer(State), SysHW = init_os_monitor(OS), - {noreply, #{sysmem_high_watermark => SysHW}}; + MemRef = start_mem_check_timer(), + CpuRef = start_cpu_check_timer(), + {noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}; handle_info(Info, State) -> ?SLOG(error, #{msg => "unexpected_info", info => Info}), {noreply, State}. @@ -174,11 +179,15 @@ code_change(_OldVsn, State, _Extra) -> %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- +cancel_outdated_timer(#{mem_time_ref := MemRef, cpu_time_ref := CpuRef}) -> + emqx_misc:cancel_timer(MemRef), + emqx_misc:cancel_timer(CpuRef), + ok. start_cpu_check_timer() -> Interval = emqx:get_config([sysmon, os, cpu_check_interval]), case erlang:system_info(system_architecture) of - "x86_64-pc-linux-musl" -> ok; + "x86_64-pc-linux-musl" -> undefined; _ -> start_timer(Interval, cpu_check) end. @@ -191,12 +200,11 @@ start_mem_check_timer() -> true -> start_timer(Interval, mem_check); false -> - ok + undefined end. start_timer(Interval, Msg) -> - _ = emqx_misc:start_timer(Interval, Msg), - ok. + emqx_misc:start_timer(Interval, Msg). update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 -> ?SLOG(warning, #{msg => "discarded_out_of_range_mem_alarm_threshold", value => HWM}), @@ -223,7 +231,7 @@ do_update_mem_alarm_status(HWM0) -> }, usage_msg(Usage, mem) ); - _ -> + false -> ok = emqx_alarm:ensure_deactivated( high_system_memory_usage, #{ @@ -236,5 +244,5 @@ do_update_mem_alarm_status(HWM0) -> ok. usage_msg(Usage, What) -> - %% devide by 1.0 to ensure float point number + %% divide by 1.0 to ensure float point number iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])). diff --git a/apps/emqx/src/emqx_packet.erl b/apps/emqx/src/emqx_packet.erl index c247419f0..32bd3df53 100644 --- a/apps/emqx/src/emqx_packet.erl +++ b/apps/emqx/src/emqx_packet.erl @@ -477,9 +477,13 @@ format(Packet) -> format(Packet, emqx_trace_handler:payload_encode()). format(#mqtt_packet{header = Header, variable = Variable, payload = Payload}, PayloadEncode) -> HeaderIO = format_header(Header), case format_variable(Variable, Payload, PayloadEncode) of - "" -> HeaderIO; - VarIO -> [HeaderIO, ",", VarIO] - end. + "" -> [HeaderIO, ")"]; + VarIO -> [HeaderIO, ", ", VarIO, ")"] + end; +%% receive a frame error packet, such as {frame_error,frame_too_large} or +%% {frame_error,#{expected => <<"'MQTT' or 'MQIsdp'">>,hint => invalid_proto_name,received => <<"bad_name">>}} +format(FrameError, _PayloadEncode) -> + lists:flatten(io_lib:format("~tp", [FrameError])). format_header(#mqtt_packet_header{ type = Type, @@ -487,14 +491,14 @@ format_header(#mqtt_packet_header{ qos = QoS, retain = Retain }) -> - io_lib:format("~ts(Q~p, R~p, D~p)", [type_name(Type), QoS, i(Retain), i(Dup)]). + io_lib:format("~ts(Q~p, R~p, D~p", [type_name(Type), QoS, i(Retain), i(Dup)]). format_variable(undefined, _, _) -> ""; format_variable(Variable, undefined, PayloadEncode) -> format_variable(Variable, PayloadEncode); format_variable(Variable, Payload, PayloadEncode) -> - [format_variable(Variable, PayloadEncode), ",", format_payload(Payload, PayloadEncode)]. + [format_variable(Variable, PayloadEncode), ", ", format_payload(Payload, PayloadEncode)]. format_variable( #mqtt_packet_connect{ diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 8f016f684..48bd206c9 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -1815,16 +1815,12 @@ desc(_) -> %% utils -spec conf_get(string() | [string()], hocon:config()) -> term(). conf_get(Key, Conf) -> - V = hocon_maps:get(Key, Conf), - case is_binary(V) of - true -> - binary_to_list(V); - false -> - V - end. + ensure_list(hocon_maps:get(Key, Conf)). conf_get(Key, Conf, Default) -> - V = hocon_maps:get(Key, Conf, Default), + ensure_list(hocon_maps:get(Key, Conf, Default)). + +ensure_list(V) -> case is_binary(V) of true -> binary_to_list(V); diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index cf1a9dc08..f80d18a3a 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -175,9 +175,9 @@ schedulers() -> loads() -> [ - {load1, ftos(avg1() / 256)}, - {load5, ftos(avg5() / 256)}, - {load15, ftos(avg15() / 256)} + {load1, load(avg1())}, + {load5, load(avg5())}, + {load15, load(avg15())} ]. system_info_keys() -> ?SYSTEM_INFO_KEYS. @@ -232,9 +232,6 @@ mem_info() -> Free = proplists:get_value(free_memory, Dataset), [{total_memory, Total}, {used_memory, Total - Free}]. -ftos(F) -> - io_lib:format("~.2f", [F / 1.0]). - %%%% erlang vm scheduler_usage fun copied from recon scheduler_usage(Interval) when is_integer(Interval) -> %% We start and stop the scheduler_wall_time system flag @@ -391,18 +388,32 @@ cpu_util() -> compat_windows(Fun) -> case os:type() of {win32, nt} -> - 0; + 0.0; _Type -> case catch Fun() of + Val when is_float(Val) -> floor(Val * 100) / 100; Val when is_number(Val) -> Val; - _Error -> 0 + _Error -> 0.0 end end. -%% @doc Return on which Eralng/OTP the current vm is running. -%% NOTE: This API reads a file, do not use it in critical code paths. +load(Avg) -> + floor((Avg / 256) * 100) / 100. + +%% @doc Return on which Erlang/OTP the current vm is running. +%% The dashboard's /api/nodes endpoint will call this function frequently. +%% we should avoid reading file every time. +%% The OTP version never changes at runtime expect upgrade erts, +%% so we cache it in a persistent term for performance. get_otp_version() -> - read_otp_version(). + case persistent_term:get(emqx_otp_version, undefined) of + undefined -> + OtpVsn = read_otp_version(), + persistent_term:put(emqx_otp_version, OtpVsn), + OtpVsn; + OtpVsn when is_binary(OtpVsn) -> + OtpVsn + end. read_otp_version() -> ReleasesDir = filename:join([code:root_dir(), "releases"]), @@ -416,6 +427,8 @@ read_otp_version() -> %% running tests etc. OtpMajor = erlang:system_info(otp_release), OtpVsnFile = filename:join([ReleasesDir, OtpMajor, "OTP_VERSION"]), - {ok, Vsn} = file:read_file(OtpVsnFile), - Vsn + case file:read_file(OtpVsnFile) of + {ok, Vsn} -> Vsn; + {error, enoent} -> list_to_binary(OtpMajor) + end end. diff --git a/apps/emqx/src/emqx_vm_mon.erl b/apps/emqx/src/emqx_vm_mon.erl index 5447e94e9..1327a1bb0 100644 --- a/apps/emqx/src/emqx_vm_mon.erl +++ b/apps/emqx/src/emqx_vm_mon.erl @@ -63,7 +63,7 @@ handle_info({timeout, _Timer, check}, State) -> ProcessCount = erlang:system_info(process_count), case ProcessCount / erlang:system_info(process_limit) of Percent when Percent > ProcHighWatermark -> - Usage = io_lib:format("~p%", [Percent * 100]), + Usage = usage(Percent), Message = [Usage, " process usage"], emqx_alarm:activate( too_many_processes, @@ -75,7 +75,7 @@ handle_info({timeout, _Timer, check}, State) -> Message ); Percent when Percent < ProcLowWatermark -> - Usage = io_lib:format("~p%", [Percent * 100]), + Usage = usage(Percent), Message = [Usage, " process usage"], emqx_alarm:ensure_deactivated( too_many_processes, @@ -108,3 +108,6 @@ code_change(_OldVsn, State, _Extra) -> start_check_timer() -> Interval = emqx:get_config([sysmon, vm, process_check_interval]), emqx_misc:start_timer(Interval, check). + +usage(Percent) -> + integer_to_list(floor(Percent * 100)) ++ "%". diff --git a/apps/emqx/src/emqx_ws_connection.erl b/apps/emqx/src/emqx_ws_connection.erl index 817c4b505..ead609ed8 100644 --- a/apps/emqx/src/emqx_ws_connection.erl +++ b/apps/emqx/src/emqx_ws_connection.erl @@ -399,6 +399,12 @@ get_peer_info(Type, Listener, Req, Opts) -> websocket_handle({binary, Data}, State) when is_list(Data) -> websocket_handle({binary, iolist_to_binary(Data)}, State); websocket_handle({binary, Data}, State) -> + ?SLOG(debug, #{ + msg => "raw_bin_received", + size => iolist_size(Data), + bin => binary_to_list(binary:encode_hex(Data)), + type => "hex" + }), State2 = ensure_stats_timer(State), {Packets, State3} = parse_incoming(Data, [], State2), LenMsg = erlang:length(Packets), @@ -437,6 +443,7 @@ websocket_info({incoming, Packet = ?CONNECT_PACKET(ConnPkt)}, State) -> NState = State#state{serialize = Serialize}, handle_incoming(Packet, cancel_idle_timer(NState)); websocket_info({incoming, Packet}, State) -> + ?TRACE("WS-MQTT", "mqtt_packet_received", #{packet => Packet}), handle_incoming(Packet, State); websocket_info({outgoing, Packets}, State) -> return(enqueue(Packets, State)); @@ -719,7 +726,6 @@ parse_incoming(Data, Packets, State = #state{parse_state = ParseState}) -> handle_incoming(Packet, State = #state{listener = {Type, Listener}}) when is_record(Packet, mqtt_packet) -> - ?TRACE("WS-MQTT", "mqtt_packet_received", #{packet => Packet}), ok = inc_incoming_stats(Packet), NState = case diff --git a/apps/emqx/test/emqx_bpapi_static_checks.erl b/apps/emqx/test/emqx_bpapi_static_checks.erl index f218739fc..142750cac 100644 --- a/apps/emqx/test/emqx_bpapi_static_checks.erl +++ b/apps/emqx/test/emqx_bpapi_static_checks.erl @@ -65,7 +65,7 @@ % Reason: legacy code. A fun and a QC query are % passed in the args, it's futile to try to statically % check it - "emqx_mgmt_api:do_query/2, emqx_mgmt_api:collect_total_from_tail_nodes/3" + "emqx_mgmt_api:do_query/2, emqx_mgmt_api:collect_total_from_tail_nodes/2" ). -define(XREF, myxref). diff --git a/apps/emqx/test/emqx_mqtt_SUITE.erl b/apps/emqx/test/emqx_mqtt_SUITE.erl index 287d7fdba..d0162b34b 100644 --- a/apps/emqx/test/emqx_mqtt_SUITE.erl +++ b/apps/emqx/test/emqx_mqtt_SUITE.erl @@ -237,7 +237,7 @@ do_async_set_keepalive() -> {ok, _} = ?block_until( #{ ?snk_kind := insert_channel_info, - client_id := ClientID + clientid := ClientID }, 2000, 100 diff --git a/apps/emqx/test/emqx_os_mon_SUITE.erl b/apps/emqx/test/emqx_os_mon_SUITE.erl index 8729bbdb6..0538d949a 100644 --- a/apps/emqx/test/emqx_os_mon_SUITE.erl +++ b/apps/emqx/test/emqx_os_mon_SUITE.erl @@ -25,25 +25,43 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps( - [], - fun - (emqx) -> - application:set_env(emqx, os_mon, [ - {cpu_check_interval, 1}, - {cpu_high_watermark, 5}, - {cpu_low_watermark, 80}, - {procmem_high_watermark, 5} - ]); - (_) -> - ok - end - ), + emqx_common_test_helpers:start_apps([]), Config. end_per_suite(_Config) -> emqx_common_test_helpers:stop_apps([]). +init_per_testcase(t_cpu_check_alarm, Config) -> + SysMon = emqx_config:get([sysmon, os], #{}), + emqx_config:put([sysmon, os], SysMon#{ + cpu_high_watermark => 0.9, + cpu_low_watermark => 0, + %% 200ms + cpu_check_interval => 200 + }), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + Config; +init_per_testcase(t_sys_mem_check_alarm, Config) -> + case os:type() of + {unix, linux} -> + SysMon = emqx_config:get([sysmon, os], #{}), + emqx_config:put([sysmon, os], SysMon#{ + sysmem_high_watermark => 0.51, + %% 200ms + mem_check_interval => 200 + }), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + Config; + _ -> + Config + end; +init_per_testcase(_, Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + Config. + t_api(_) -> ?assertEqual(60000, emqx_os_mon:get_mem_check_interval()), ?assertEqual(ok, emqx_os_mon:set_mem_check_interval(30000)), @@ -67,3 +85,106 @@ t_api(_) -> emqx_os_mon ! ignored, gen_server:stop(emqx_os_mon), ok. + +t_sys_mem_check_alarm(Config) -> + case os:type() of + {unix, linux} -> + do_sys_mem_check_alarm(Config); + _ -> + skip + end. + +do_sys_mem_check_alarm(_Config) -> + emqx_config:put([sysmon, os, mem_check_interval], 200), + emqx_os_mon:update(emqx_config:get([sysmon, os])), + Mem = 0.52345, + Usage = floor(Mem * 10000) / 100, + emqx_common_test_helpers:with_mock( + load_ctl, + get_memory_usage, + fun() -> Mem end, + fun() -> + timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), + ?assert( + emqx_vm_mon_SUITE:is_existing( + high_system_memory_usage, emqx_alarm:get_alarms(activated) + ), + #{ + load_ctl_memory => load_ctl:get_memory_usage(), + config => emqx_config:get([sysmon, os]), + process => sys:get_state(emqx_os_mon), + alarms => Alarms + } + ), + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 51.0, usage := RealUsage}, + message := Msg, + name := high_system_memory_usage + } + ] = + lists:filter( + fun + (#{name := high_system_memory_usage}) -> true; + (_) -> false + end, + Alarms + ), + ?assert(RealUsage >= Usage, {RealUsage, Usage}), + ?assert(is_binary(Msg)), + emqx_config:put([sysmon, os, sysmem_high_watermark], 0.99999), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + timer:sleep(600), + Activated = emqx_alarm:get_alarms(activated), + ?assertNot( + emqx_vm_mon_SUITE:is_existing(high_system_memory_usage, Activated), + #{activated => Activated, process_state => sys:get_state(emqx_os_mon)} + ) + end + ). + +t_cpu_check_alarm(_) -> + CpuUtil = 90.12345, + Usage = floor(CpuUtil * 100) / 100, + emqx_common_test_helpers:with_mock( + cpu_sup, + util, + fun() -> CpuUtil end, + fun() -> + timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), + ?assert( + emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) + ), + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 90.0, low_watermark := 0, usage := RealUsage}, + message := Msg, + name := high_cpu_usage + } + ] = + lists:filter( + fun + (#{name := high_cpu_usage}) -> true; + (_) -> false + end, + Alarms + ), + ?assert(RealUsage >= Usage, {RealUsage, Usage}), + ?assert(is_binary(Msg)), + emqx_config:put([sysmon, os, cpu_high_watermark], 1), + emqx_config:put([sysmon, os, cpu_low_watermark], 0.96), + timer:sleep(500), + ?assertNot( + emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) + ) + end + ). diff --git a/apps/emqx/test/emqx_vm_SUITE.erl b/apps/emqx/test/emqx_vm_SUITE.erl index f9809361b..35f37a41e 100644 --- a/apps/emqx/test/emqx_vm_SUITE.erl +++ b/apps/emqx/test/emqx_vm_SUITE.erl @@ -24,7 +24,24 @@ all() -> emqx_common_test_helpers:all(?MODULE). t_load(_Config) -> - ?assertMatch([{load1, _}, {load5, _}, {load15, _}], emqx_vm:loads()). + lists:foreach( + fun({Avg, LoadKey, Int}) -> + emqx_common_test_helpers:with_mock( + cpu_sup, + Avg, + fun() -> Int end, + fun() -> + Load = proplists:get_value(LoadKey, emqx_vm:loads()), + ?assertEqual(Int / 256, Load) + end + ) + end, + [{avg1, load1, 0}, {avg5, load5, 128}, {avg15, load15, 256}] + ), + ?assertMatch( + [{load1, _}, {load5, _}, {load15, _}], + emqx_vm:loads() + ). t_systeminfo(_Config) -> ?assertEqual( diff --git a/apps/emqx/test/emqx_vm_mon_SUITE.erl b/apps/emqx/test/emqx_vm_mon_SUITE.erl index 140a00010..ceeffafb5 100644 --- a/apps/emqx/test/emqx_vm_mon_SUITE.erl +++ b/apps/emqx/test/emqx_vm_mon_SUITE.erl @@ -23,13 +23,13 @@ all() -> emqx_common_test_helpers:all(?MODULE). -init_per_testcase(t_alarms, Config) -> +init_per_testcase(t_too_many_processes_alarm, Config) -> emqx_common_test_helpers:boot_modules(all), emqx_common_test_helpers:start_apps([]), emqx_config:put([sysmon, vm], #{ process_high_watermark => 0, process_low_watermark => 0, - %% 1s + %% 100ms process_check_interval => 100 }), ok = supervisor:terminate_child(emqx_sys_sup, emqx_vm_mon), @@ -43,9 +43,29 @@ init_per_testcase(_, Config) -> end_per_testcase(_, _Config) -> emqx_common_test_helpers:stop_apps([]). -t_alarms(_) -> +t_too_many_processes_alarm(_) -> timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), ?assert(is_existing(too_many_processes, emqx_alarm:get_alarms(activated))), + ?assertMatch( + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 0, low_watermark := 0, usage := "0%"}, + message := <<"0% process usage">>, + name := too_many_processes + } + ], + lists:filter( + fun + (#{name := too_many_processes}) -> true; + (_) -> false + end, + Alarms + ) + ), emqx_config:put([sysmon, vm, process_high_watermark], 70), emqx_config:put([sysmon, vm, process_low_watermark], 60), timer:sleep(500), diff --git a/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl b/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl index ff0bfaea7..fa0658f6a 100644 --- a/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl +++ b/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl @@ -112,8 +112,7 @@ t_update_with_invalid_config(_Config) -> #{ kind := validation_error, path := "authentication.server", - reason := required_field, - value := undefined + reason := required_field } ]} }}}, diff --git a/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl b/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl index 15b180c96..b480e0262 100644 --- a/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl +++ b/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl @@ -188,8 +188,7 @@ t_create_invalid_config(_Config) -> ?assertMatch( {error, #{ kind := validation_error, - path := "authorization.sources.1", - discarded_errors_count := 0 + path := "authorization.sources.1.server" }}, emqx_authz:update(?CMD_REPLACE, [C]) ). diff --git a/apps/emqx_bridge/src/emqx_bridge.erl b/apps/emqx_bridge/src/emqx_bridge.erl index fb199522d..dc0a96690 100644 --- a/apps/emqx_bridge/src/emqx_bridge.erl +++ b/apps/emqx_bridge/src/emqx_bridge.erl @@ -31,6 +31,7 @@ -export([ load/0, + unload/0, lookup/1, lookup/2, lookup/3, @@ -75,6 +76,21 @@ load() -> maps:to_list(Bridges) ). +unload() -> + unload_hook(), + Bridges = emqx:get_config([bridges], #{}), + lists:foreach( + fun({Type, NamedConf}) -> + lists:foreach( + fun({Name, _Conf}) -> + _ = emqx_bridge_resource:stop(Type, Name) + end, + maps:to_list(NamedConf) + ) + end, + maps:to_list(Bridges) + ). + safe_load_bridge(Type, Name, Conf, Opts) -> try _Res = emqx_bridge_resource:create(Type, Name, Conf, Opts), @@ -263,7 +279,7 @@ create(BridgeType, BridgeName, RawConf) -> brige_action => create, bridge_type => BridgeType, bridge_name => BridgeName, - bridge_raw_config => RawConf + bridge_raw_config => emqx_misc:redact(RawConf) }), emqx_conf:update( emqx_bridge:config_key_path() ++ [BridgeType, BridgeName], diff --git a/apps/emqx_bridge/src/emqx_bridge_api.erl b/apps/emqx_bridge/src/emqx_bridge_api.erl index 2c43ce5d7..2e94f0719 100644 --- a/apps/emqx_bridge/src/emqx_bridge_api.erl +++ b/apps/emqx_bridge/src/emqx_bridge_api.erl @@ -171,12 +171,12 @@ bridge_info_examples(Method, WithMetrics) -> ee_bridge_examples(Method) ). +-if(?EMQX_RELEASE_EDITION == ee). ee_bridge_examples(Method) -> - try - emqx_ee_bridge:examples(Method) - catch - _:_ -> #{} - end. + emqx_ee_bridge:examples(Method). +-else. +ee_bridge_examples(_Method) -> #{}. +-endif. info_example(Type, Method, WithMetrics) -> maps:merge( diff --git a/apps/emqx_bridge/src/emqx_bridge_app.erl b/apps/emqx_bridge/src/emqx_bridge_app.erl index e10034bae..daae15a17 100644 --- a/apps/emqx_bridge/src/emqx_bridge_app.erl +++ b/apps/emqx_bridge/src/emqx_bridge_app.erl @@ -39,7 +39,7 @@ start(_StartType, _StartArgs) -> stop(_State) -> emqx_conf:remove_handler(?LEAF_NODE_HDLR_PATH), emqx_conf:remove_handler(?TOP_LELVE_HDLR_PATH), - ok = emqx_bridge:unload_hook(), + ok = emqx_bridge:unload(), ok. -if(?EMQX_RELEASE_EDITION == ee). diff --git a/apps/emqx_bridge/src/emqx_bridge_resource.erl b/apps/emqx_bridge/src/emqx_bridge_resource.erl index cbff85df3..d228f2281 100644 --- a/apps/emqx_bridge/src/emqx_bridge_resource.erl +++ b/apps/emqx_bridge/src/emqx_bridge_resource.erl @@ -137,7 +137,7 @@ create(Type, Name, Conf, Opts0) -> msg => "create bridge", type => Type, name => Name, - config => Conf + config => emqx_misc:redact(Conf) }), Opts = override_start_after_created(Conf, Opts0), {ok, _Data} = emqx_resource:create_local( @@ -172,7 +172,7 @@ update(Type, Name, {OldConf, Conf}, Opts0) -> msg => "update bridge", type => Type, name => Name, - config => Conf + config => emqx_misc:redact(Conf) }), case recreate(Type, Name, Conf, Opts) of {ok, _} -> @@ -182,7 +182,7 @@ update(Type, Name, {OldConf, Conf}, Opts0) -> msg => "updating_a_non_existing_bridge", type => Type, name => Name, - config => Conf + config => emqx_misc:redact(Conf) }), create(Type, Name, Conf, Opts); {error, Reason} -> diff --git a/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl b/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl index 862b5e188..1e55d0c0e 100644 --- a/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl +++ b/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl @@ -72,7 +72,6 @@ up(#{<<"connector">> := Connector} = Config) -> Cn(proto_ver, <<"v4">>), Cn(server, undefined), Cn(retry_interval, <<"15s">>), - Cn(reconnect_interval, <<"15s">>), Cn(ssl, default_ssl()), {enable, Enable}, {resource_opts, default_resource_opts()}, diff --git a/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl b/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl index 4aecfac5d..c490294eb 100644 --- a/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl +++ b/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl @@ -56,8 +56,8 @@ api_schema(Method) -> EE = ee_api_schemas(Method), hoconsc:union(Broker ++ EE). +-if(?EMQX_RELEASE_EDITION == ee). ee_api_schemas(Method) -> - %% must ensure the app is loaded before checking if fn is defined. ensure_loaded(emqx_ee_bridge, emqx_ee_bridge), case erlang:function_exported(emqx_ee_bridge, api_schemas, 1) of true -> emqx_ee_bridge:api_schemas(Method); @@ -65,13 +65,31 @@ ee_api_schemas(Method) -> end. ee_fields_bridges() -> - %% must ensure the app is loaded before checking if fn is defined. ensure_loaded(emqx_ee_bridge, emqx_ee_bridge), case erlang:function_exported(emqx_ee_bridge, fields, 1) of true -> emqx_ee_bridge:fields(bridges); false -> [] end. +%% must ensure the app is loaded before checking if fn is defined. +ensure_loaded(App, Mod) -> + try + _ = application:load(App), + _ = Mod:module_info(), + ok + catch + _:_ -> + ok + end. + +-else. + +ee_api_schemas(_) -> []. + +ee_fields_bridges() -> []. + +-endif. + common_bridge_fields() -> [ {enable, @@ -194,17 +212,3 @@ status() -> node_name() -> {"node", mk(binary(), #{desc => ?DESC("desc_node_name"), example => "emqx@127.0.0.1"})}. - -%%================================================================================================= -%% Internal fns -%%================================================================================================= - -ensure_loaded(App, Mod) -> - try - _ = application:load(App), - _ = Mod:module_info(), - ok - catch - _:_ -> - ok - end. diff --git a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl index 6b0b3a941..5cb78d3ba 100644 --- a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl @@ -640,7 +640,7 @@ t_bridges_probe(Config) -> ?assertMatch( #{ <<"code">> := <<"TEST_FAILED">>, - <<"message">> := <<"#{reason => econnrefused", _/binary>> + <<"message">> := <<"econnrefused">> }, jsx:decode(ConnRefused) ), diff --git a/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl b/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl index a2671a40e..36dd6324a 100644 --- a/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl +++ b/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl @@ -224,7 +224,6 @@ bridges { mode = \"cluster_shareload\" password = \"\" proto_ver = \"v5\" - reconnect_interval = \"15s\" replayq {offload = false, seg_bytes = \"100MB\"} retry_interval = \"12s\" server = \"localhost:1883\" @@ -257,7 +256,6 @@ bridges { mode = \"cluster_shareload\" password = \"\" proto_ver = \"v4\" - reconnect_interval = \"15s\" replayq {offload = false, seg_bytes = \"100MB\"} retry_interval = \"44s\" server = \"localhost:1883\" diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index a99f06f20..c4afa4db2 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -32,7 +32,6 @@ -define(BRIDGE_CONF_DEFAULT, <<"bridges: {}">>). -define(TYPE_MQTT, <<"mqtt">>). --define(NAME_MQTT, <<"my_mqtt_bridge">>). -define(BRIDGE_NAME_INGRESS, <<"ingress_mqtt_bridge">>). -define(BRIDGE_NAME_EGRESS, <<"egress_mqtt_bridge">>). @@ -98,6 +97,24 @@ } }). +-define(assertMetrics(Pat, BridgeID), + ?assertMetrics(Pat, true, BridgeID) +). +-define(assertMetrics(Pat, Guard, BridgeID), + ?assertMatch( + #{ + <<"metrics">> := Pat, + <<"node_metrics">> := [ + #{ + <<"node">> := _, + <<"metrics">> := Pat + } + ] + } when Guard, + request_bridge_metrics(BridgeID) + ) +). + inspect(Selected, _Envs, _Args) -> persistent_term:put(?MODULE, #{inspect => Selected}). @@ -176,7 +193,7 @@ t_mqtt_conn_bridge_ingress(_) -> {ok, 201, Bridge} = request( post, uri(["bridges"]), - ?SERVER_CONF(User1)#{ + ServerConf = ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF @@ -186,8 +203,21 @@ t_mqtt_conn_bridge_ingress(_) -> <<"type">> := ?TYPE_MQTT, <<"name">> := ?BRIDGE_NAME_INGRESS } = jsx:decode(Bridge), + BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), + %% try to create the bridge again + ?assertMatch( + {ok, 400, _}, + request(post, uri(["bridges"]), ServerConf) + ), + + %% try to reconfigure the bridge + ?assertMatch( + {ok, 200, _}, + request(put, uri(["bridges", BridgeIDIngress]), ServerConf) + ), + %% we now test if the bridge works as expected RemoteTopic = <>, LocalTopic = <>, @@ -198,34 +228,12 @@ t_mqtt_conn_bridge_ingress(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(RemoteTopic, Payload)), %% we should receive a message on the local broker, with specified topic - ?assert( - receive - {deliver, LocalTopic, #message{payload = Payload}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, LocalTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(LocalTopic, Payload), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDIngress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 0, <<"received">> := 1}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 0, <<"received">> := 1} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 0, <<"received">> := 1}, + BridgeIDIngress ), %% delete the bridge @@ -234,23 +242,38 @@ t_mqtt_conn_bridge_ingress(_) -> ok. +t_mqtt_conn_bridge_ignores_clean_start(_) -> + BridgeName = atom_to_binary(?FUNCTION_NAME), + BridgeID = create_bridge( + ?SERVER_CONF(<<"user1">>)#{ + <<"type">> => ?TYPE_MQTT, + <<"name">> => BridgeName, + <<"ingress">> => ?INGRESS_CONF, + <<"clean_start">> => false + } + ), + + {ok, 200, BridgeJSON} = request(get, uri(["bridges", BridgeID]), []), + Bridge = jsx:decode(BridgeJSON), + + %% verify that there's no `clean_start` in response + ?assertEqual(#{}, maps:with([<<"clean_start">>], Bridge)), + + %% delete the bridge + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), + {ok, 200, <<"[]">>} = request(get, uri(["bridges"]), []), + + ok. + t_mqtt_conn_bridge_ingress_no_payload_template(_) -> User1 = <<"user1">>, - %% create an MQTT bridge, using POST - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDIngress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF_NO_PAYLOAD_TEMPLATE } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_INGRESS - } = jsx:decode(Bridge), - BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), %% we now test if the bridge works as expected RemoteTopic = <>, @@ -262,40 +285,13 @@ t_mqtt_conn_bridge_ingress_no_payload_template(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(RemoteTopic, Payload)), %% we should receive a message on the local broker, with specified topic - ?assert( - receive - {deliver, LocalTopic, #message{payload = MapMsg}} -> - ct:pal("local broker got message: ~p on topic ~p", [MapMsg, LocalTopic]), - %% the MapMsg is all fields outputed by Rule-Engine. it's a binary coded json here. - case jsx:decode(MapMsg) of - #{<<"payload">> := Payload} -> - true; - _ -> - false - end; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + Msg = assert_mqtt_msg_received(LocalTopic), + ?assertMatch(#{<<"payload">> := Payload}, jsx:decode(Msg#message.payload)), %% verify the metrics of the bridge - {ok, 200, BridgeStr} = request(get, uri(["bridges", BridgeIDIngress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 0, <<"received">> := 1}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 0, <<"received">> := 1} - } - ] - }, - jsx:decode(BridgeStr) + ?assertMetrics( + #{<<"matched">> := 0, <<"received">> := 1}, + BridgeIDIngress ), %% delete the bridge @@ -307,22 +303,15 @@ t_mqtt_conn_bridge_ingress_no_payload_template(_) -> t_mqtt_conn_bridge_egress(_) -> %% then we add a mqtt connector, using POST User1 = <<"user1">>, - - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), ResourceID = emqx_bridge_resource:resource_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), + %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, @@ -334,36 +323,14 @@ t_mqtt_conn_bridge_egress(_) -> emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = Payload, from = From}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, RemoteTopic]), - Size = byte_size(ResourceID), - ?assertMatch(<>, From), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + Msg = assert_mqtt_msg_received(RemoteTopic, Payload), + Size = byte_size(ResourceID), + ?assertMatch(<>, Msg#message.from), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, + BridgeIDEgress ), %% delete the bridge @@ -375,21 +342,15 @@ t_mqtt_conn_bridge_egress_no_payload_template(_) -> %% then we add a mqtt connector, using POST User1 = <<"user1">>, - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF_NO_PAYLOAD_TEMPLATE } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), ResourceID = emqx_bridge_resource:resource_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), + %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, @@ -401,42 +362,15 @@ t_mqtt_conn_bridge_egress_no_payload_template(_) -> emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = MapMsg, from = From}} -> - ct:pal("local broker got message: ~p on topic ~p", [MapMsg, RemoteTopic]), - %% the MapMsg is all fields outputed by Rule-Engine. it's a binary coded json here. - Size = byte_size(ResourceID), - ?assertMatch(<>, From), - case jsx:decode(MapMsg) of - #{<<"payload">> := Payload} -> - true; - _ -> - false - end; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + Msg = assert_mqtt_msg_received(RemoteTopic), + %% the MapMsg is all fields outputed by Rule-Engine. it's a binary coded json here. + ?assertMatch(<>, Msg#message.from), + ?assertMatch(#{<<"payload">> := Payload}, jsx:decode(Msg#message.payload)), %% verify the metrics of the bridge - {ok, 200, BridgeStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0} - } - ] - }, - jsx:decode(BridgeStr) + ?assertMetrics( + #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, + BridgeIDEgress ), %% delete the bridge @@ -447,9 +381,7 @@ t_mqtt_conn_bridge_egress_no_payload_template(_) -> t_egress_custom_clientid_prefix(_Config) -> User1 = <<"user1">>, - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"clientid_prefix">> => <<"my-custom-prefix">>, <<"type">> => ?TYPE_MQTT, @@ -457,11 +389,6 @@ t_egress_custom_clientid_prefix(_Config) -> <<"egress">> => ?EGRESS_CONF } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), ResourceID = emqx_bridge_resource:resource_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), LocalTopic = <>, RemoteTopic = <>, @@ -470,58 +397,36 @@ t_egress_custom_clientid_prefix(_Config) -> timer:sleep(100), emqx:publish(emqx_message:make(LocalTopic, Payload)), - receive - {deliver, RemoteTopic, #message{from = From}} -> - Size = byte_size(ResourceID), - ?assertMatch(<<"my-custom-prefix:", _ResouceID:Size/binary, _/binary>>, From), - ok - after 1000 -> - ct:fail("should have published message") - end, + Msg = assert_mqtt_msg_received(RemoteTopic, Payload), + Size = byte_size(ResourceID), + ?assertMatch(<<"my-custom-prefix:", _ResouceID:Size/binary, _/binary>>, Msg#message.from), {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []), ok. t_mqtt_conn_bridge_ingress_and_egress(_) -> User1 = <<"user1">>, - %% create an MQTT bridge, using POST - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDIngress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF } ), - - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_INGRESS - } = jsx:decode(Bridge), - BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), - {ok, 201, Bridge2} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge2), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, Payload = <<"hello">>, emqx:subscribe(RemoteTopic), - {ok, 200, BridgeMetricsStr1} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), #{ <<"metrics">> := #{ <<"matched">> := CntMatched1, <<"success">> := CntSuccess1, <<"failed">> := 0 @@ -538,29 +443,17 @@ t_mqtt_conn_bridge_ingress_and_egress(_) -> } } ] - } = jsx:decode(BridgeMetricsStr1), + } = request_bridge_metrics(BridgeIDEgress), timer:sleep(100), %% PUBLISH a message to the 'local' broker, as we have only one broker, %% the remote broker is also the local one. emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = Payload}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, RemoteTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(RemoteTopic, Payload), %% verify the metrics of the bridge timer:sleep(1000), - {ok, 200, BridgeMetricsStr2} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), #{ <<"metrics">> := #{ <<"matched">> := CntMatched2, <<"success">> := CntSuccess2, <<"failed">> := 0 @@ -577,7 +470,7 @@ t_mqtt_conn_bridge_ingress_and_egress(_) -> } } ] - } = jsx:decode(BridgeMetricsStr2), + } = request_bridge_metrics(BridgeIDEgress), ?assertEqual(CntMatched2, CntMatched1 + 1), ?assertEqual(CntSuccess2, CntSuccess1 + 1), ?assertEqual(NodeCntMatched2, NodeCntMatched1 + 1), @@ -590,16 +483,13 @@ t_mqtt_conn_bridge_ingress_and_egress(_) -> ok. t_ingress_mqtt_bridge_with_rules(_) -> - {ok, 201, _} = request( - post, - uri(["bridges"]), + BridgeIDIngress = create_bridge( ?SERVER_CONF(<<"user1">>)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF } ), - BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), {ok, 201, Rule} = request( post, @@ -624,18 +514,7 @@ t_ingress_mqtt_bridge_with_rules(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(RemoteTopic, Payload)), %% we should receive a message on the local broker, with specified topic - ?assert( - receive - {deliver, LocalTopic, #message{payload = Payload}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, LocalTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(LocalTopic, Payload), %% and also the rule should be matched, with matched + 1: {ok, 200, Rule1} = request(get, uri(["rules", RuleId]), []), {ok, 200, Metrics} = request(get, uri(["rules", RuleId, "metrics"]), []), @@ -680,37 +559,22 @@ t_ingress_mqtt_bridge_with_rules(_) -> ), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDIngress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 0, <<"received">> := 1}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 0, <<"received">> := 1} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 0, <<"received">> := 1}, + BridgeIDIngress ), {ok, 204, <<>>} = request(delete, uri(["rules", RuleId]), []), {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDIngress]), []). t_egress_mqtt_bridge_with_rules(_) -> - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(<<"user1">>)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF } ), - #{<<"type">> := ?TYPE_MQTT, <<"name">> := ?BRIDGE_NAME_EGRESS} = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), {ok, 201, Rule} = request( post, @@ -734,18 +598,7 @@ t_egress_mqtt_bridge_with_rules(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = Payload}} -> - ct:pal("remote broker got message: ~p on topic ~p", [Payload, RemoteTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(RemoteTopic, Payload), emqx:unsubscribe(RemoteTopic), %% PUBLISH a message to the rule. @@ -780,35 +633,12 @@ t_egress_mqtt_bridge_with_rules(_) -> ), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic2, #message{payload = Payload2}} -> - ct:pal("remote broker got message: ~p on topic ~p", [Payload2, RemoteTopic2]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(RemoteTopic2, Payload2), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 2, <<"success">> := 2, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := #{ - <<"matched">> := 2, <<"success">> := 2, <<"failed">> := 0 - } - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 2, <<"success">> := 2, <<"failed">> := 0}, + BridgeIDEgress ), {ok, 204, <<>>} = request(delete, uri(["rules", RuleId]), []), @@ -817,37 +647,31 @@ t_egress_mqtt_bridge_with_rules(_) -> t_mqtt_conn_bridge_egress_reconnect(_) -> %% then we add a mqtt connector, using POST User1 = <<"user1">>, - - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF, - %% to make it reconnect quickly - <<"reconnect_interval">> => <<"1s">>, <<"resource_opts">> => #{ <<"worker_pool_size">> => 2, <<"query_mode">> => <<"sync">>, %% using a long time so we can test recovery <<"request_timeout">> => <<"15s">>, %% to make it check the healthy quickly - <<"health_check_interval">> => <<"0.5s">> + <<"health_check_interval">> => <<"0.5s">>, + %% to make it reconnect quickly + <<"auto_restart_interval">> => <<"1s">> } } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), + on_exit(fun() -> %% delete the bridge {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []), {ok, 200, <<"[]">>} = request(get, uri(["bridges"]), []), ok end), + %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, @@ -862,20 +686,9 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> assert_mqtt_msg_received(RemoteTopic, Payload0), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, + BridgeIDEgress ), %% stop the listener 1883 to make the bridge disconnected @@ -899,70 +712,183 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> ), Payload1 = <<"hello2">>, Payload2 = <<"hello3">>, - %% we need to to it in other processes because it'll block due to + %% We need to do it in other processes because it'll block due to %% the long timeout spawn(fun() -> emqx:publish(emqx_message:make(LocalTopic, Payload1)) end), spawn(fun() -> emqx:publish(emqx_message:make(LocalTopic, Payload2)) end), {ok, _} = snabbkaffe:receive_events(SRef), %% verify the metrics of the bridge, the message should be queued - {ok, 200, BridgeStr1} = request(get, uri(["bridges", BridgeIDEgress]), []), - {ok, 200, BridgeMetricsStr1} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - Decoded1 = jsx:decode(BridgeStr1), - DecodedMetrics1 = jsx:decode(BridgeMetricsStr1), ?assertMatch( - Status when (Status == <<"connected">> orelse Status == <<"connecting">>), - maps:get(<<"status">>, Decoded1) + #{<<"status">> := Status} when + Status == <<"connecting">> orelse Status == <<"disconnected">>, + request_bridge(BridgeIDEgress) ), %% matched >= 3 because of possible retries. - ?assertMatch( + ?assertMetrics( #{ <<"matched">> := Matched, <<"success">> := 1, <<"failed">> := 0, <<"queuing">> := Queuing, <<"inflight">> := Inflight - } when Matched >= 3 andalso Inflight + Queuing == 2, - maps:get(<<"metrics">>, DecodedMetrics1) + }, + Matched >= 3 andalso Inflight + Queuing == 2, + BridgeIDEgress ), %% start the listener 1883 to make the bridge reconnected ok = emqx_listeners:start_listener('tcp:default'), timer:sleep(1500), %% verify the metrics of the bridge, the 2 queued messages should have been sent - {ok, 200, BridgeStr2} = request(get, uri(["bridges", BridgeIDEgress]), []), - {ok, 200, BridgeMetricsStr2} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - Decoded2 = jsx:decode(BridgeStr2), - ?assertEqual(<<"connected">>, maps:get(<<"status">>, Decoded2)), + ?assertMatch(#{<<"status">> := <<"connected">>}, request_bridge(BridgeIDEgress)), %% matched >= 3 because of possible retries. - ?assertMatch( + ?assertMetrics( #{ - <<"metrics">> := #{ - <<"matched">> := Matched, - <<"success">> := 3, - <<"failed">> := 0, - <<"queuing">> := 0, - <<"retried">> := _ - } - } when Matched >= 3, - jsx:decode(BridgeMetricsStr2) + <<"matched">> := Matched, + <<"success">> := 3, + <<"failed">> := 0, + <<"queuing">> := 0, + <<"retried">> := _ + }, + Matched >= 3, + BridgeIDEgress ), %% also verify the 2 messages have been sent to the remote broker assert_mqtt_msg_received(RemoteTopic, Payload1), assert_mqtt_msg_received(RemoteTopic, Payload2), ok. -assert_mqtt_msg_received(Topic, Payload) -> - ct:pal("checking if ~p has been received on ~p", [Payload, Topic]), +t_mqtt_conn_bridge_egress_async_reconnect(_) -> + User1 = <<"user1">>, + BridgeIDEgress = create_bridge( + ?SERVER_CONF(User1)#{ + <<"type">> => ?TYPE_MQTT, + <<"name">> => ?BRIDGE_NAME_EGRESS, + <<"egress">> => ?EGRESS_CONF, + <<"resource_opts">> => #{ + <<"worker_pool_size">> => 2, + <<"query_mode">> => <<"async">>, + %% using a long time so we can test recovery + <<"request_timeout">> => <<"15s">>, + %% to make it check the healthy quickly + <<"health_check_interval">> => <<"0.5s">>, + %% to make it reconnect quickly + <<"auto_restart_interval">> => <<"1s">> + } + } + ), + + on_exit(fun() -> + %% delete the bridge + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []), + {ok, 200, <<"[]">>} = request(get, uri(["bridges"]), []), + ok + end), + + Self = self(), + LocalTopic = <>, + RemoteTopic = <>, + emqx:subscribe(RemoteTopic), + + Publisher = start_publisher(LocalTopic, 200, Self), + ct:sleep(1000), + + %% stop the listener 1883 to make the bridge disconnected + ok = emqx_listeners:stop_listener('tcp:default'), + ct:sleep(1500), + ?assertMatch( + #{<<"status">> := Status} when + Status == <<"connecting">> orelse Status == <<"disconnected">>, + request_bridge(BridgeIDEgress) + ), + + %% start the listener 1883 to make the bridge reconnected + ok = emqx_listeners:start_listener('tcp:default'), + timer:sleep(1500), + ?assertMatch( + #{<<"status">> := <<"connected">>}, + request_bridge(BridgeIDEgress) + ), + + N = stop_publisher(Publisher), + + %% all those messages should eventually be delivered + [ + assert_mqtt_msg_received(RemoteTopic, Payload) + || I <- lists:seq(1, N), + Payload <- [integer_to_binary(I)] + ], + + ok. + +start_publisher(Topic, Interval, CtrlPid) -> + spawn_link(fun() -> publisher(Topic, 1, Interval, CtrlPid) end). + +stop_publisher(Pid) -> + _ = Pid ! {self(), stop}, receive - {deliver, Topic, #message{payload = Payload}} -> - ct:pal("Got mqtt message: ~p on topic ~p", [Payload, Topic]), - ok - after 300 -> - {messages, Messages} = process_info(self(), messages), - Msg = io_lib:format("timeout waiting for ~p on topic ~p", [Payload, Topic]), - error({Msg, #{messages => Messages}}) + {Pid, N} -> N + after 1_000 -> ct:fail("publisher ~p did not stop", [Pid]) end. +publisher(Topic, N, Delay, CtrlPid) -> + _ = emqx:publish(emqx_message:make(Topic, integer_to_binary(N))), + receive + {CtrlPid, stop} -> + CtrlPid ! {self(), N} + after Delay -> + publisher(Topic, N + 1, Delay, CtrlPid) + end. + +%% + +assert_mqtt_msg_received(Topic) -> + assert_mqtt_msg_received(Topic, '_', 200). + +assert_mqtt_msg_received(Topic, Payload) -> + assert_mqtt_msg_received(Topic, Payload, 200). + +assert_mqtt_msg_received(Topic, Payload, Timeout) -> + receive + {deliver, Topic, Msg = #message{}} when Payload == '_' -> + ct:pal("received mqtt ~p on topic ~p", [Msg, Topic]), + Msg; + {deliver, Topic, Msg = #message{payload = Payload}} -> + ct:pal("received mqtt ~p on topic ~p", [Msg, Topic]), + Msg + after Timeout -> + {messages, Messages} = process_info(self(), messages), + ct:fail("timeout waiting ~p ms for ~p on topic '~s', messages = ~0p", [ + Timeout, + Payload, + Topic, + Messages + ]) + end. + +create_bridge(Config = #{<<"type">> := Type, <<"name">> := Name}) -> + {ok, 201, Bridge} = request( + post, + uri(["bridges"]), + Config + ), + ?assertMatch( + #{ + <<"type">> := Type, + <<"name">> := Name + }, + jsx:decode(Bridge) + ), + emqx_bridge_resource:bridge_id(Type, Name). + +request_bridge(BridgeID) -> + {ok, 200, Bridge} = request(get, uri(["bridges", BridgeID]), []), + jsx:decode(Bridge). + +request_bridge_metrics(BridgeID) -> + {ok, 200, BridgeMetrics} = request(get, uri(["bridges", BridgeID, "metrics"]), []), + jsx:decode(BridgeMetrics). + request(Method, Url, Body) -> request(<<"connector_admin">>, Method, Url, Body). diff --git a/apps/emqx_conf/i18n/emqx_conf_schema.conf b/apps/emqx_conf/i18n/emqx_conf_schema.conf index 131118f5b..56da34621 100644 --- a/apps/emqx_conf/i18n/emqx_conf_schema.conf +++ b/apps/emqx_conf/i18n/emqx_conf_schema.conf @@ -1255,7 +1255,7 @@ Supervisor 报告的类型。默认为 error 类型。 log_overload_kill_restart_after { desc { - en: """If the handler is terminated, it restarts automatically after a delay specified in milliseconds. The value `infinity` prevents restarts.""" + en: """The handler restarts automatically after a delay in the event of termination, unless the value `infinity` is set, which blocks any subsequent restarts.""" zh: """如果处理进程终止,它会在以指定的时间后后自动重新启动。 `infinity` 不自动重启。""" } label { diff --git a/apps/emqx_conf/src/emqx_cluster_rpc.erl b/apps/emqx_conf/src/emqx_cluster_rpc.erl index fe701049c..c285e09b8 100644 --- a/apps/emqx_conf/src/emqx_cluster_rpc.erl +++ b/apps/emqx_conf/src/emqx_cluster_rpc.erl @@ -495,15 +495,15 @@ log_and_alarm(IsSuccess, Res, #{kind := ?APPLY_KIND_INITIATE} = Meta) -> %% because nothing is committed case IsSuccess of true -> - ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_result", result => Res}); + ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_result", result => emqx_misc:redact(Res)}); false -> - ?SLOG(warning, Meta#{msg => "cluster_rpc_apply_result", result => Res}) + ?SLOG(warning, Meta#{msg => "cluster_rpc_apply_result", result => emqx_misc:redact(Res)}) end; log_and_alarm(true, Res, Meta) -> - ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_ok", result => Res}), + ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_ok", result => emqx_misc:redact(Res)}), do_alarm(deactivate, Res, Meta); log_and_alarm(false, Res, Meta) -> - ?SLOG(error, Meta#{msg => "cluster_rpc_apply_failed", result => Res}), + ?SLOG(error, Meta#{msg => "cluster_rpc_apply_failed", result => emqx_misc:redact(Res)}), do_alarm(activate, Res, Meta). do_alarm(Fun, Res, #{tnx_id := Id} = Meta) -> diff --git a/apps/emqx_conf/src/emqx_conf.app.src b/apps/emqx_conf/src/emqx_conf.app.src index b13c0d055..f7fd33e3b 100644 --- a/apps/emqx_conf/src/emqx_conf.app.src +++ b/apps/emqx_conf/src/emqx_conf.app.src @@ -1,6 +1,6 @@ {application, emqx_conf, [ {description, "EMQX configuration management"}, - {vsn, "0.1.10"}, + {vsn, "0.1.11"}, {registered, []}, {mod, {emqx_conf_app, []}}, {applications, [kernel, stdlib]}, diff --git a/apps/emqx_conf/src/emqx_conf_app.erl b/apps/emqx_conf/src/emqx_conf_app.erl index f2e4f6f56..34224c3f2 100644 --- a/apps/emqx_conf/src/emqx_conf_app.erl +++ b/apps/emqx_conf/src/emqx_conf_app.erl @@ -29,6 +29,7 @@ start(_StartType, _StartArgs) -> init_conf(), + ok = emqx_config_logger:refresh_config(), emqx_conf_sup:start_link(). stop(_State) -> diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index bc9154933..9793e00d0 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -993,7 +993,7 @@ translation("ekka") -> translation("kernel") -> [ {"logger_level", fun tr_logger_level/1}, - {"logger", fun tr_logger/1}, + {"logger", fun tr_logger_handlers/1}, {"error_logger", fun(_) -> silent end} ]; translation("emqx") -> @@ -1065,70 +1065,10 @@ tr_cluster_discovery(Conf) -> -spec tr_logger_level(hocon:config()) -> logger:level(). tr_logger_level(Conf) -> - ConsoleLevel = conf_get("log.console_handler.level", Conf, undefined), - FileLevels = [ - conf_get("level", SubConf) - || {_, SubConf} <- - logger_file_handlers(Conf) - ], - case FileLevels ++ [ConsoleLevel || ConsoleLevel =/= undefined] of - %% warning is the default level we should use - [] -> warning; - Levels -> least_severe_log_level(Levels) - end. + emqx_config_logger:tr_level(Conf). -logger_file_handlers(Conf) -> - Handlers = maps:to_list(conf_get("log.file_handlers", Conf, #{})), - lists:filter( - fun({_Name, Opts}) -> - B = conf_get("enable", Opts), - true = is_boolean(B), - B - end, - Handlers - ). - -tr_logger(Conf) -> - %% For the default logger that outputs to console - ConsoleHandler = - case conf_get("log.console_handler.enable", Conf) of - true -> - ConsoleConf = conf_get("log.console_handler", Conf), - [ - {handler, console, logger_std_h, #{ - level => conf_get("log.console_handler.level", Conf), - config => (log_handler_conf(ConsoleConf))#{type => standard_io}, - formatter => log_formatter(ConsoleConf), - filters => log_filter(ConsoleConf) - }} - ]; - false -> - [] - end, - %% For the file logger - FileHandlers = - [ - begin - {handler, to_atom(HandlerName), logger_disk_log_h, #{ - level => conf_get("level", SubConf), - config => (log_handler_conf(SubConf))#{ - type => - case conf_get("rotation.enable", SubConf) of - true -> wrap; - _ -> halt - end, - file => conf_get("file", SubConf), - max_no_files => conf_get("rotation.count", SubConf), - max_no_bytes => conf_get("max_size", SubConf) - }, - formatter => log_formatter(SubConf), - filters => log_filter(SubConf), - filesync_repeat_interval => no_repeat - }} - end - || {HandlerName, SubConf} <- logger_file_handlers(Conf) - ], - [{handler, default, undefined}] ++ ConsoleHandler ++ FileHandlers. +tr_logger_handlers(Conf) -> + emqx_config_logger:tr_handlers(Conf). log_handler_common_confs(Enable) -> [ @@ -1225,78 +1165,6 @@ log_handler_common_confs(Enable) -> )} ]. -log_handler_conf(Conf) -> - SycModeQlen = conf_get("sync_mode_qlen", Conf), - DropModeQlen = conf_get("drop_mode_qlen", Conf), - FlushQlen = conf_get("flush_qlen", Conf), - Overkill = conf_get("overload_kill", Conf), - BurstLimit = conf_get("burst_limit", Conf), - #{ - sync_mode_qlen => SycModeQlen, - drop_mode_qlen => DropModeQlen, - flush_qlen => FlushQlen, - overload_kill_enable => conf_get("enable", Overkill), - overload_kill_qlen => conf_get("qlen", Overkill), - overload_kill_mem_size => conf_get("mem_size", Overkill), - overload_kill_restart_after => conf_get("restart_after", Overkill), - burst_limit_enable => conf_get("enable", BurstLimit), - burst_limit_max_count => conf_get("max_count", BurstLimit), - burst_limit_window_time => conf_get("window_time", BurstLimit) - }. - -log_formatter(Conf) -> - CharsLimit = - case conf_get("chars_limit", Conf) of - unlimited -> unlimited; - V when V > 0 -> V - end, - TimeOffSet = - case conf_get("time_offset", Conf) of - "system" -> ""; - "utc" -> 0; - OffSetStr -> OffSetStr - end, - SingleLine = conf_get("single_line", Conf), - Depth = conf_get("max_depth", Conf), - do_formatter(conf_get("formatter", Conf), CharsLimit, SingleLine, TimeOffSet, Depth). - -%% helpers -do_formatter(json, CharsLimit, SingleLine, TimeOffSet, Depth) -> - {emqx_logger_jsonfmt, #{ - chars_limit => CharsLimit, - single_line => SingleLine, - time_offset => TimeOffSet, - depth => Depth - }}; -do_formatter(text, CharsLimit, SingleLine, TimeOffSet, Depth) -> - {emqx_logger_textfmt, #{ - template => [time, " [", level, "] ", msg, "\n"], - chars_limit => CharsLimit, - single_line => SingleLine, - time_offset => TimeOffSet, - depth => Depth - }}. - -log_filter(Conf) -> - case conf_get("supervisor_reports", Conf) of - error -> [{drop_progress_reports, {fun logger_filters:progress/2, stop}}]; - progress -> [] - end. - -least_severe_log_level(Levels) -> - hd(sort_log_levels(Levels)). - -sort_log_levels(Levels) -> - lists:sort( - fun(A, B) -> - case logger:compare_levels(A, B) of - R when R == lt; R == eq -> true; - gt -> false - end - end, - Levels - ). - crash_dump_file_default() -> case os:getenv("RUNNER_LOG_DIR") of false -> @@ -1308,11 +1176,9 @@ crash_dump_file_default() -> %% utils -spec conf_get(string() | [string()], hocon:config()) -> term(). -conf_get(Key, Conf) -> - ensure_list(hocon_maps:get(Key, Conf)). +conf_get(Key, Conf) -> emqx_schema:conf_get(Key, Conf). -conf_get(Key, Conf, Default) -> - ensure_list(hocon_maps:get(Key, Conf, Default)). +conf_get(Key, Conf, Default) -> emqx_schema:conf_get(Key, Conf, Default). filter(Opts) -> [{K, V} || {K, V} <- Opts, V =/= undefined]. @@ -1376,15 +1242,6 @@ to_atom(Str) when is_list(Str) -> to_atom(Bin) when is_binary(Bin) -> binary_to_atom(Bin, utf8). --spec ensure_list(binary() | list(char())) -> list(char()). -ensure_list(V) -> - case is_binary(V) of - true -> - binary_to_list(V); - false -> - V - end. - roots(Module) -> lists:map(fun({_BinName, Root}) -> Root end, hocon_schema:roots(Module)). diff --git a/apps/emqx_connector/docker-ct b/apps/emqx_connector/docker-ct index 7b9a4c068..5a4056927 100644 --- a/apps/emqx_connector/docker-ct +++ b/apps/emqx_connector/docker-ct @@ -1,4 +1,5 @@ mongo redis +redis_cluster mysql pgsql diff --git a/apps/emqx_connector/i18n/emqx_connector_http.conf b/apps/emqx_connector/i18n/emqx_connector_http.conf index 7583a38ed..da886191b 100644 --- a/apps/emqx_connector/i18n/emqx_connector_http.conf +++ b/apps/emqx_connector/i18n/emqx_connector_http.conf @@ -4,12 +4,12 @@ emqx_connector_http { en: """ The base URL is the URL includes only the scheme, host and port.
When send an HTTP request, the real URL to be used is the concatenation of the base URL and the -path parameter (passed by the emqx_resource:query/2,3 or provided by the request parameter).
+path parameter
For example: `http://localhost:9901/` """ zh: """ base URL 只包含host和port。
-发送HTTP请求时,真实的URL是由base URL 和 path parameter连接而成(通过emqx_resource:query/2,3传递,或者通过请求参数提供)。
+发送HTTP请求时,真实的URL是由base URL 和 path parameter连接而成。
示例:`http://localhost:9901/` """ } @@ -76,14 +76,8 @@ base URL 只包含host和port。
request { desc { - en: """ -If the request is provided, the caller can send HTTP requests via -emqx_resource:query(ResourceId, {send_message, BridgeId, Message}) -""" - zh: """ -如果提供了请求,调用者可以通过以下方式发送 HTTP 请求 -emqx_resource:query(ResourceId, {send_message, BridgeId, Message}) -""" + en: """Configure HTTP request parameters.""" + zh: """设置 HTTP 请求的参数。""" } label: { en: "Request" diff --git a/apps/emqx_connector/i18n/emqx_connector_redis.conf b/apps/emqx_connector/i18n/emqx_connector_redis.conf index e8e05d08f..e42e73498 100644 --- a/apps/emqx_connector/i18n/emqx_connector_redis.conf +++ b/apps/emqx_connector/i18n/emqx_connector_redis.conf @@ -69,7 +69,7 @@ The Redis default port 6379 is used if `[:Port]` is not specified. A Node list for Cluster to connect to. The nodes should be separated with commas, such as: `Node[,Node].` For each Node should be: The IPv4 or IPv6 address or the hostname to connect to. A host entry has the following form: `Host[:Port]`. -The MongoDB default port 27017 is used if `[:Port]` is not specified. +The Redis default port 6379 is used if `[:Port]` is not specified. """ zh: """ diff --git a/apps/emqx_connector/rebar.config b/apps/emqx_connector/rebar.config index 8252c9cf4..2ce6b00f8 100644 --- a/apps/emqx_connector/rebar.config +++ b/apps/emqx_connector/rebar.config @@ -11,17 +11,9 @@ {eldap2, {git, "https://github.com/emqx/eldap2", {tag, "v0.2.2"}}}, {mysql, {git, "https://github.com/emqx/mysql-otp", {tag, "1.7.2"}}}, {epgsql, {git, "https://github.com/emqx/epgsql", {tag, "4.7.0.1"}}}, - %% NOTE: mind poolboy version when updating mongodb-erlang version {mongodb, {git, "https://github.com/emqx/mongodb-erlang", {tag, "v3.0.19"}}}, - %% NOTE: mind poolboy version when updating eredis_cluster version - {eredis_cluster, {git, "https://github.com/emqx/eredis_cluster", {tag, "0.7.5"}}}, - %% mongodb-erlang uses a special fork https://github.com/comtihon/poolboy.git - %% (which has overflow_ttl feature added). - %% However, it references `{branch, "master}` (commit 9c06a9a on 2021-04-07). - %% By accident, We have always been using the upstream fork due to - %% eredis_cluster's dependency getting resolved earlier. - %% Here we pin 1.5.2 to avoid surprises in the future. - {poolboy, {git, "https://github.com/emqx/poolboy.git", {tag, "1.5.2"}}} + %% NOTE: mind ecpool version when updating eredis_cluster version + {eredis_cluster, {git, "https://github.com/emqx/eredis_cluster", {tag, "0.8.1"}}} ]}. {shell, [ diff --git a/apps/emqx_connector/src/emqx_connector_http.erl b/apps/emqx_connector/src/emqx_connector_http.erl index 7f84c665a..7c4a1fcf8 100644 --- a/apps/emqx_connector/src/emqx_connector_http.erl +++ b/apps/emqx_connector/src/emqx_connector_http.erl @@ -209,7 +209,7 @@ on_start( ?SLOG(info, #{ msg => "starting_http_connector", connector => InstId, - config => emqx_misc:redact(Config) + config => redact(Config) }), {Transport, TransportOpts} = case Scheme of @@ -234,6 +234,7 @@ on_start( PoolName = emqx_plugin_libs_pool:pool_name(InstId), State = #{ pool_name => PoolName, + pool_type => PoolType, host => Host, port => Port, connect_timeout => ConnectTimeout, @@ -264,9 +265,10 @@ on_query(InstId, {send_message, Msg}, State) -> path := Path, body := Body, headers := Headers, - request_timeout := Timeout, - max_retries := Retry + request_timeout := Timeout } = process_request(Request, Msg), + %% bridge buffer worker has retry, do not let ehttpc retry + Retry = 0, on_query( InstId, {undefined, Method, {Path, Headers, Body}, Timeout, Retry}, @@ -274,26 +276,30 @@ on_query(InstId, {send_message, Msg}, State) -> ) end; on_query(InstId, {Method, Request}, State) -> - on_query(InstId, {undefined, Method, Request, 5000, 2}, State); + %% TODO: Get retry from State + on_query(InstId, {undefined, Method, Request, 5000, _Retry = 2}, State); on_query(InstId, {Method, Request, Timeout}, State) -> - on_query(InstId, {undefined, Method, Request, Timeout, 2}, State); + %% TODO: Get retry from State + on_query(InstId, {undefined, Method, Request, Timeout, _Retry = 2}, State); on_query( InstId, {KeyOrNum, Method, Request, Timeout, Retry}, - #{pool_name := PoolName, base_path := BasePath} = State + #{base_path := BasePath} = State ) -> ?TRACE( "QUERY", "http_connector_received", - #{request => Request, connector => InstId, state => State} + #{ + request => redact(Request), + connector => InstId, + state => redact(State) + } ), NRequest = formalize_request(Method, BasePath, Request), + Worker = resolve_pool_worker(State, KeyOrNum), case ehttpc:request( - case KeyOrNum of - undefined -> PoolName; - _ -> {PoolName, KeyOrNum} - end, + Worker, Method, NRequest, Timeout, @@ -310,7 +316,7 @@ on_query( {error, Reason} = Result -> ?SLOG(error, #{ msg => "http_connector_do_request_failed", - request => NRequest, + request => redact(NRequest), reason => Reason, connector => InstId }), @@ -322,7 +328,7 @@ on_query( {ok, StatusCode, Headers} -> ?SLOG(error, #{ msg => "http connector do request, received error response", - request => NRequest, + request => redact(NRequest), connector => InstId, status_code => StatusCode }), @@ -330,7 +336,7 @@ on_query( {ok, StatusCode, Headers, Body} -> ?SLOG(error, #{ msg => "http connector do request, received error response", - request => NRequest, + request => redact(NRequest), connector => InstId, status_code => StatusCode }), @@ -361,19 +367,19 @@ on_query_async( InstId, {KeyOrNum, Method, Request, Timeout}, ReplyFunAndArgs, - #{pool_name := PoolName, base_path := BasePath} = State + #{base_path := BasePath} = State ) -> + Worker = resolve_pool_worker(State, KeyOrNum), ?TRACE( "QUERY_ASYNC", "http_connector_received", - #{request => Request, connector => InstId, state => State} + #{ + request => redact(Request), + connector => InstId, + state => redact(State) + } ), NRequest = formalize_request(Method, BasePath, Request), - Worker = - case KeyOrNum of - undefined -> ehttpc_pool:pick_worker(PoolName); - _ -> ehttpc_pool:pick_worker(PoolName, KeyOrNum) - end, ok = ehttpc:request_async( Worker, Method, @@ -383,6 +389,16 @@ on_query_async( ), {ok, Worker}. +resolve_pool_worker(State, undefined) -> + resolve_pool_worker(State, self()); +resolve_pool_worker(#{pool_name := PoolName} = State, Key) -> + case maps:get(pool_type, State, random) of + random -> + ehttpc_pool:pick_worker(PoolName); + hash -> + ehttpc_pool:pick_worker(PoolName, Key) + end. + on_get_status(_InstId, #{pool_name := PoolName, connect_timeout := Timeout} = State) -> case do_get_status(PoolName, Timeout) of ok -> @@ -401,7 +417,7 @@ do_get_status(PoolName, Timeout) -> {error, Reason} = Error -> ?SLOG(error, #{ msg => "http_connector_get_status_failed", - reason => Reason, + reason => redact(Reason), worker => Worker }), Error @@ -554,3 +570,63 @@ reply_delegator(ReplyFunAndArgs, Result) -> _ -> emqx_resource:apply_reply_fun(ReplyFunAndArgs, Result) end. + +%% The HOCON schema system may generate sensitive keys with this format +is_sensitive_key([{str, StringKey}]) -> + is_sensitive_key(StringKey); +is_sensitive_key(Atom) when is_atom(Atom) -> + is_sensitive_key(erlang:atom_to_binary(Atom)); +is_sensitive_key(Bin) when is_binary(Bin), (size(Bin) =:= 19 orelse size(Bin) =:= 13) -> + try + %% This is wrapped in a try-catch since we don't know that Bin is a + %% valid string so string:lowercase/1 might throw an exception. + %% + %% We want to convert this to lowercase since the http header fields + %% are case insensitive, which means that a user of the Webhook bridge + %% can write this field name in many different ways. + LowercaseBin = iolist_to_binary(string:lowercase(Bin)), + case LowercaseBin of + <<"authorization">> -> true; + <<"proxy-authorization">> -> true; + _ -> false + end + catch + _:_ -> false + end; +is_sensitive_key(_) -> + false. + +%% Function that will do a deep traversal of Data and remove sensitive +%% information (i.e., passwords) +redact(Data) -> + emqx_misc:redact(Data, fun is_sensitive_key/1). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +redact_test_() -> + TestData1 = [ + {<<"content-type">>, <<"application/json">>}, + {<<"Authorization">>, <<"Basic YWxhZGRpbjpvcGVuc2VzYW1l">>} + ], + + TestData2 = #{ + headers => + [ + {[{str, <<"content-type">>}], [{str, <<"application/json">>}]}, + {[{str, <<"Authorization">>}], [{str, <<"Basic YWxhZGRpbjpvcGVuc2VzYW1l">>}]} + ] + }, + [ + ?_assert(is_sensitive_key(<<"Authorization">>)), + ?_assert(is_sensitive_key(<<"AuthoriZation">>)), + ?_assert(is_sensitive_key('AuthoriZation')), + ?_assert(is_sensitive_key(<<"PrOxy-authoRizaTion">>)), + ?_assert(is_sensitive_key('PrOxy-authoRizaTion')), + ?_assertNot(is_sensitive_key(<<"Something">>)), + ?_assertNot(is_sensitive_key(89)), + ?_assertNotEqual(TestData1, redact(TestData1)), + ?_assertNotEqual(TestData2, redact(TestData2)) + ]. + +-endif. diff --git a/apps/emqx_connector/src/emqx_connector_mqtt.erl b/apps/emqx_connector/src/emqx_connector_mqtt.erl index 71ed81dda..cffd138b5 100644 --- a/apps/emqx_connector/src/emqx_connector_mqtt.erl +++ b/apps/emqx_connector/src/emqx_connector_mqtt.erl @@ -105,16 +105,15 @@ init([]) -> {ok, {SupFlag, []}}. bridge_spec(Config) -> + {Name, NConfig} = maps:take(name, Config), #{ - id => maps:get(name, Config), - start => {emqx_connector_mqtt_worker, start_link, [Config]}, - restart => permanent, - shutdown => 5000, - type => worker, - modules => [emqx_connector_mqtt_worker] + id => Name, + start => {emqx_connector_mqtt_worker, start_link, [Name, NConfig]}, + restart => temporary, + shutdown => 5000 }. --spec bridges() -> [{node(), map()}]. +-spec bridges() -> [{_Name, _Status}]. bridges() -> [ {Name, emqx_connector_mqtt_worker:status(Name)} @@ -144,8 +143,7 @@ on_message_received(Msg, HookPoint, ResId) -> %% =================================================================== callback_mode() -> async_if_possible. -on_start(InstId, Conf) -> - InstanceId = binary_to_atom(InstId, utf8), +on_start(InstanceId, Conf) -> ?SLOG(info, #{ msg => "starting_mqtt_connector", connector => InstanceId, @@ -154,8 +152,8 @@ on_start(InstId, Conf) -> BasicConf = basic_config(Conf), BridgeConf = BasicConf#{ name => InstanceId, - clientid => clientid(InstId, Conf), - subscriptions => make_sub_confs(maps:get(ingress, Conf, undefined), Conf, InstId), + clientid => clientid(InstanceId, Conf), + subscriptions => make_sub_confs(maps:get(ingress, Conf, undefined), Conf, InstanceId), forwards => make_forward_confs(maps:get(egress, Conf, undefined)) }, case ?MODULE:create_bridge(BridgeConf) of @@ -189,44 +187,50 @@ on_stop(_InstId, #{name := InstanceId}) -> on_query(_InstId, {send_message, Msg}, #{name := InstanceId}) -> ?TRACE("QUERY", "send_msg_to_remote_node", #{message => Msg, connector => InstanceId}), - emqx_connector_mqtt_worker:send_to_remote(InstanceId, Msg). + case emqx_connector_mqtt_worker:send_to_remote(InstanceId, Msg) of + ok -> + ok; + {error, Reason} -> + classify_error(Reason) + end. -on_query_async( - _InstId, - {send_message, Msg}, - {ReplyFun, Args}, - #{name := InstanceId} -) -> +on_query_async(_InstId, {send_message, Msg}, Callback, #{name := InstanceId}) -> ?TRACE("QUERY", "async_send_msg_to_remote_node", #{message => Msg, connector => InstanceId}), - %% this is a cast, currently. - ok = emqx_connector_mqtt_worker:send_to_remote_async(InstanceId, Msg, {ReplyFun, Args}), - WorkerPid = get_worker_pid(InstanceId), - {ok, WorkerPid}. + case emqx_connector_mqtt_worker:send_to_remote_async(InstanceId, Msg, Callback) of + ok -> + ok; + {ok, Pid} -> + {ok, Pid}; + {error, Reason} -> + classify_error(Reason) + end. on_get_status(_InstId, #{name := InstanceId}) -> - case emqx_connector_mqtt_worker:status(InstanceId) of - connected -> connected; - _ -> connecting - end. + emqx_connector_mqtt_worker:status(InstanceId). + +classify_error(disconnected = Reason) -> + {error, {recoverable_error, Reason}}; +classify_error({disconnected, _RC, _} = Reason) -> + {error, {recoverable_error, Reason}}; +classify_error({shutdown, _} = Reason) -> + {error, {recoverable_error, Reason}}; +classify_error(Reason) -> + {error, {unrecoverable_error, Reason}}. ensure_mqtt_worker_started(InstanceId, BridgeConf) -> - case emqx_connector_mqtt_worker:ensure_started(InstanceId) of - ok -> {ok, #{name => InstanceId, bridge_conf => BridgeConf}}; - {error, Reason} -> {error, Reason} + case emqx_connector_mqtt_worker:connect(InstanceId) of + {ok, Properties} -> + {ok, #{name => InstanceId, config => BridgeConf, props => Properties}}; + {error, Reason} -> + {error, Reason} end. -%% mqtt workers, when created and called via bridge callbacks, are -%% registered. --spec get_worker_pid(atom()) -> pid(). -get_worker_pid(InstanceId) -> - whereis(InstanceId). - make_sub_confs(EmptyMap, _Conf, _) when map_size(EmptyMap) == 0 -> undefined; make_sub_confs(undefined, _Conf, _) -> undefined; -make_sub_confs(SubRemoteConf, Conf, InstId) -> - ResId = emqx_resource_manager:manager_id_to_resource_id(InstId), +make_sub_confs(SubRemoteConf, Conf, InstanceId) -> + ResId = emqx_resource_manager:manager_id_to_resource_id(InstanceId), case maps:find(hookpoint, Conf) of error -> error({no_hookpoint_provided, Conf}); @@ -247,7 +251,6 @@ basic_config( server := Server, proto_ver := ProtoVer, bridge_mode := BridgeMode, - clean_start := CleanStart, keepalive := KeepAlive, retry_interval := RetryIntv, max_inflight := MaxInflight, @@ -260,7 +263,6 @@ basic_config( %% 30s connect_timeout => 30, auto_reconnect => true, - reconnect_interval => ?AUTO_RECONNECT_INTERVAL, proto_ver => ProtoVer, %% Opening bridge_mode will form a non-standard mqtt connection message. %% A load balancing server (such as haproxy) is often set up before the emqx broker server. @@ -268,13 +270,15 @@ basic_config( %% non-standard mqtt connection packets will be filtered out by LB. %% So let's disable bridge_mode. bridge_mode => BridgeMode, - clean_start => CleanStart, + %% NOTE + %% We are ignoring the user configuration here because there's currently no reliable way + %% to ensure proper session recovery according to the MQTT spec. + clean_start => true, keepalive => ms_to_s(KeepAlive), retry_interval => RetryIntv, max_inflight => MaxInflight, ssl => EnableSsl, - ssl_opts => maps:to_list(maps:remove(enable, Ssl)), - if_record_metrics => true + ssl_opts => maps:to_list(maps:remove(enable, Ssl)) }, maybe_put_fields([username, password], Conf, BasicConf). diff --git a/apps/emqx_connector/src/emqx_connector_redis.erl b/apps/emqx_connector/src/emqx_connector_redis.erl index 4bb46bca3..29f7c0a38 100644 --- a/apps/emqx_connector/src/emqx_connector_redis.erl +++ b/apps/emqx_connector/src/emqx_connector_redis.erl @@ -153,7 +153,7 @@ on_start( false -> [{ssl, false}] end ++ [{sentinel, maps:get(sentinel, Config, undefined)}], - PoolName = emqx_plugin_libs_pool:pool_name(InstId), + PoolName = InstId, State = #{poolname => PoolName, type => Type}, case Type of cluster -> @@ -222,29 +222,15 @@ is_unrecoverable_error(Results) when is_list(Results) -> lists:any(fun is_unrecoverable_error/1, Results); is_unrecoverable_error({error, <<"ERR unknown command ", _/binary>>}) -> true; +is_unrecoverable_error({error, invalid_cluster_command}) -> + true; is_unrecoverable_error(_) -> false. -extract_eredis_cluster_workers(PoolName) -> - lists:flatten([ - gen_server:call(PoolPid, get_all_workers) - || PoolPid <- eredis_cluster_monitor:get_all_pools(PoolName) - ]). - -eredis_cluster_workers_exist_and_are_connected(Workers) -> - length(Workers) > 0 andalso - lists:all( - fun({_, Pid, _, _}) -> - eredis_cluster_pool_worker:is_connected(Pid) =:= true - end, - Workers - ). - on_get_status(_InstId, #{type := cluster, poolname := PoolName}) -> case eredis_cluster:pool_exists(PoolName) of true -> - Workers = extract_eredis_cluster_workers(PoolName), - Health = eredis_cluster_workers_exist_and_are_connected(Workers), + Health = eredis_cluster:ping_all(PoolName), status_result(Health); false -> disconnected @@ -267,7 +253,9 @@ do_cmd(PoolName, cluster, {cmd, Command}) -> do_cmd(Conn, _Type, {cmd, Command}) -> eredis:q(Conn, Command); do_cmd(PoolName, cluster, {cmds, Commands}) -> - wrap_qp_result(eredis_cluster:qp(PoolName, Commands)); + % TODO + % Cluster mode is currently incompatible with batching. + wrap_qp_result([eredis_cluster:q(PoolName, Command) || Command <- Commands]); do_cmd(Conn, _Type, {cmds, Commands}) -> wrap_qp_result(eredis:qp(Conn, Commands)). diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl deleted file mode 100644 index 870f9acfc..000000000 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl +++ /dev/null @@ -1,236 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - -%% @doc This module implements EMQX Bridge transport layer on top of MQTT protocol - --module(emqx_connector_mqtt_mod). - --export([ - start/1, - send/2, - send_async/3, - stop/1, - ping/1 -]). - --export([ - ensure_subscribed/3, - ensure_unsubscribed/2 -]). - -%% callbacks for emqtt --export([ - handle_publish/3, - handle_disconnected/2 -]). - --include_lib("emqx/include/logger.hrl"). --include_lib("emqx/include/emqx_mqtt.hrl"). - --define(ACK_REF(ClientPid, PktId), {ClientPid, PktId}). - -%% Messages towards ack collector process --define(REF_IDS(Ref, Ids), {Ref, Ids}). - -%%-------------------------------------------------------------------- -%% emqx_bridge_connect callbacks -%%-------------------------------------------------------------------- - -start(Config) -> - Parent = self(), - ServerStr = iolist_to_binary(maps:get(server, Config)), - {Server, Port} = emqx_connector_mqtt_schema:parse_server(ServerStr), - Mountpoint = maps:get(receive_mountpoint, Config, undefined), - Subscriptions = maps:get(subscriptions, Config, undefined), - Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Subscriptions), - Handlers = make_hdlr(Parent, Vars, #{server => ServerStr}), - Config1 = Config#{ - msg_handler => Handlers, - host => Server, - port => Port, - force_ping => true, - proto_ver => maps:get(proto_ver, Config, v4) - }, - case emqtt:start_link(process_config(Config1)) of - {ok, Pid} -> - case emqtt:connect(Pid) of - {ok, _} -> - try - ok = sub_remote_topics(Pid, Subscriptions), - {ok, #{client_pid => Pid, subscriptions => Subscriptions}} - catch - throw:Reason -> - ok = stop(#{client_pid => Pid}), - {error, error_reason(Reason, ServerStr)} - end; - {error, Reason} -> - ok = stop(#{client_pid => Pid}), - {error, error_reason(Reason, ServerStr)} - end; - {error, Reason} -> - {error, error_reason(Reason, ServerStr)} - end. - -error_reason(Reason, ServerStr) -> - #{reason => Reason, server => ServerStr}. - -stop(#{client_pid := Pid}) -> - safe_stop(Pid, fun() -> emqtt:stop(Pid) end, 1000), - ok. - -ping(undefined) -> - pang; -ping(#{client_pid := Pid}) -> - emqtt:ping(Pid). - -ensure_subscribed(#{client_pid := Pid, subscriptions := Subs} = Conn, Topic, QoS) when - is_pid(Pid) --> - case emqtt:subscribe(Pid, Topic, QoS) of - {ok, _, _} -> Conn#{subscriptions => [{Topic, QoS} | Subs]}; - Error -> {error, Error} - end; -ensure_subscribed(_Conn, _Topic, _QoS) -> - %% return ok for now - %% next re-connect should should call start with new topic added to config - ok. - -ensure_unsubscribed(#{client_pid := Pid, subscriptions := Subs} = Conn, Topic) when is_pid(Pid) -> - case emqtt:unsubscribe(Pid, Topic) of - {ok, _, _} -> Conn#{subscriptions => lists:keydelete(Topic, 1, Subs)}; - Error -> {error, Error} - end; -ensure_unsubscribed(Conn, _) -> - %% return ok for now - %% next re-connect should should call start with this topic deleted from config - Conn. - -safe_stop(Pid, StopF, Timeout) -> - MRef = monitor(process, Pid), - unlink(Pid), - try - StopF() - catch - _:_ -> - ok - end, - receive - {'DOWN', MRef, _, _, _} -> - ok - after Timeout -> - exit(Pid, kill) - end. - -send(#{client_pid := ClientPid}, Msg) -> - emqtt:publish(ClientPid, Msg). - -send_async(#{client_pid := ClientPid}, Msg, Callback) -> - emqtt:publish_async(ClientPid, Msg, infinity, Callback). - -handle_publish(Msg, undefined, _Opts) -> - ?SLOG(error, #{ - msg => - "cannot_publish_to_local_broker_as" - "_'ingress'_is_not_configured", - message => Msg - }); -handle_publish(#{properties := Props} = Msg0, Vars, Opts) -> - Msg = format_msg_received(Msg0, Opts), - ?SLOG(debug, #{ - msg => "publish_to_local_broker", - message => Msg, - vars => Vars - }), - case Vars of - #{on_message_received := {Mod, Func, Args}} -> - _ = erlang:apply(Mod, Func, [Msg | Args]); - _ -> - ok - end, - maybe_publish_to_local_broker(Msg, Vars, Props). - -handle_disconnected(Reason, Parent) -> - Parent ! {disconnected, self(), Reason}. - -make_hdlr(Parent, Vars, Opts) -> - #{ - publish => {fun ?MODULE:handle_publish/3, [Vars, Opts]}, - disconnected => {fun ?MODULE:handle_disconnected/2, [Parent]} - }. - -sub_remote_topics(_ClientPid, undefined) -> - ok; -sub_remote_topics(ClientPid, #{remote := #{topic := FromTopic, qos := QoS}}) -> - case emqtt:subscribe(ClientPid, FromTopic, QoS) of - {ok, _, _} -> ok; - Error -> throw(Error) - end. - -process_config(Config) -> - maps:without([conn_type, address, receive_mountpoint, subscriptions, name], Config). - -maybe_publish_to_local_broker(Msg, Vars, Props) -> - case emqx_map_lib:deep_get([local, topic], Vars, undefined) of - %% local topic is not set, discard it - undefined -> ok; - _ -> emqx_broker:publish(emqx_connector_mqtt_msg:to_broker_msg(Msg, Vars, Props)) - end. - -format_msg_received( - #{ - dup := Dup, - payload := Payload, - properties := Props, - qos := QoS, - retain := Retain, - topic := Topic - }, - #{server := Server} -) -> - #{ - id => emqx_guid:to_hexstr(emqx_guid:gen()), - server => Server, - payload => Payload, - topic => Topic, - qos => QoS, - dup => Dup, - retain => Retain, - pub_props => printable_maps(Props), - message_received_at => erlang:system_time(millisecond) - }. - -printable_maps(undefined) -> - #{}; -printable_maps(Headers) -> - maps:fold( - fun - ('User-Property', V0, AccIn) when is_list(V0) -> - AccIn#{ - 'User-Property' => maps:from_list(V0), - 'User-Property-Pairs' => [ - #{ - key => Key, - value => Value - } - || {Key, Value} <- V0 - ] - }; - (K, V0, AccIn) -> - AccIn#{K => V0} - end, - #{}, - Headers - ). diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl index be462fcc1..6fabc95e8 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl @@ -72,12 +72,6 @@ fields("server_configs") -> )}, {server, emqx_schema:servers_sc(#{desc => ?DESC("server")}, ?MQTT_HOST_OPTS)}, {clientid_prefix, mk(binary(), #{required => false, desc => ?DESC("clientid_prefix")})}, - {reconnect_interval, - mk_duration( - "Reconnect interval. Delay for the MQTT bridge to retry establishing the connection " - "in case of transportation failure.", - #{default => "15s"} - )}, {proto_ver, mk( hoconsc:enum([v3, v4, v5]), @@ -116,7 +110,9 @@ fields("server_configs") -> boolean(), #{ default => true, - desc => ?DESC("clean_start") + desc => ?DESC("clean_start"), + hidden => true, + deprecated => {since, "v5.0.16"} } )}, {keepalive, mk_duration("MQTT Keepalive.", #{default => "300s"})}, diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index ba2162993..6da63f99a 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -60,174 +60,241 @@ %% * Local messages are all normalised to QoS-1 when exporting to remote -module(emqx_connector_mqtt_worker). --behaviour(gen_statem). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("emqx/include/logger.hrl"). %% APIs -export([ - start_link/1, + start_link/2, stop/1 ]). -%% gen_statem callbacks --export([ - terminate/3, - code_change/4, - init/1, - callback_mode/0 -]). - -%% state functions --export([ - idle/3, - connected/3 -]). - %% management APIs -export([ - ensure_started/1, - ensure_stopped/1, + connect/1, status/1, ping/1, send_to_remote/2, send_to_remote_async/3 ]). --export([get_forwards/1]). - --export([get_subscriptions/1]). +-export([handle_publish/3]). +-export([handle_disconnect/1]). -export_type([ config/0, ack_ref/0 ]). --type id() :: atom() | string() | pid(). --type qos() :: emqx_types:qos(). +-type name() :: term(). +% -type qos() :: emqx_types:qos(). -type config() :: map(). -type ack_ref() :: term(). --type topic() :: emqx_types:topic(). +% -type topic() :: emqx_types:topic(). -include_lib("emqx/include/logger.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). -%% same as default in-flight limit for emqtt --define(DEFAULT_INFLIGHT_SIZE, 32). --define(DEFAULT_RECONNECT_DELAY_MS, timer:seconds(5)). --define(DEFAULT_SEG_BYTES, (1 bsl 20)). --define(DEFAULT_MAX_TOTAL_SIZE, (1 bsl 31)). +-define(REF(Name), {via, gproc, ?NAME(Name)}). +-define(NAME(Name), {n, l, Name}). %% @doc Start a bridge worker. Supported configs: -%% start_type: 'manual' (default) or 'auto', when manual, bridge will stay -%% at 'idle' state until a manual call to start it. -%% connect_module: The module which implements emqx_bridge_connect behaviour -%% and work as message batch transport layer -%% reconnect_interval: Delay in milli-seconds for the bridge worker to retry -%% in case of transportation failure. -%% max_inflight: Max number of batches allowed to send-ahead before receiving -%% confirmation from remote node/cluster %% mountpoint: The topic mount point for messages sent to remote node/cluster %% `undefined', `<<>>' or `""' to disable %% forwards: Local topics to subscribe. %% %% Find more connection specific configs in the callback modules %% of emqx_bridge_connect behaviour. -start_link(Opts) when is_list(Opts) -> - start_link(maps:from_list(Opts)); -start_link(Opts) -> - case maps:get(name, Opts, undefined) of - undefined -> - gen_statem:start_link(?MODULE, Opts, []); - Name -> - Name1 = name(Name), - gen_statem:start_link({local, Name1}, ?MODULE, Opts#{name => Name1}, []) +-spec start_link(name(), map()) -> + {ok, pid()} | {error, _Reason}. +start_link(Name, BridgeOpts) -> + ?SLOG(debug, #{ + msg => "client_starting", + name => Name, + options => BridgeOpts + }), + Conf = init_config(BridgeOpts), + Options = mk_client_options(Conf, BridgeOpts), + case emqtt:start_link(Options) of + {ok, Pid} -> + true = gproc:reg_other(?NAME(Name), Pid, Conf), + {ok, Pid}; + {error, Reason} = Error -> + ?SLOG(error, #{ + msg => "client_start_failed", + config => emqx_misc:redact(BridgeOpts), + reason => Reason + }), + Error end. -ensure_started(Name) -> - gen_statem:call(name(Name), ensure_started). - -%% @doc Manually stop bridge worker. State idempotency ensured. -ensure_stopped(Name) -> - gen_statem:call(name(Name), ensure_stopped, 5000). - -stop(Pid) -> gen_statem:stop(Pid). - -status(Pid) when is_pid(Pid) -> - gen_statem:call(Pid, status); -status(Name) -> - gen_statem:call(name(Name), status). - -ping(Pid) when is_pid(Pid) -> - gen_statem:call(Pid, ping); -ping(Name) -> - gen_statem:call(name(Name), ping). - -send_to_remote(Pid, Msg) when is_pid(Pid) -> - gen_statem:call(Pid, {send_to_remote, Msg}); -send_to_remote(Name, Msg) -> - gen_statem:call(name(Name), {send_to_remote, Msg}). - -send_to_remote_async(Pid, Msg, Callback) when is_pid(Pid) -> - gen_statem:cast(Pid, {send_to_remote_async, Msg, Callback}); -send_to_remote_async(Name, Msg, Callback) -> - gen_statem:cast(name(Name), {send_to_remote_async, Msg, Callback}). - -%% @doc Return all forwards (local subscriptions). --spec get_forwards(id()) -> [topic()]. -get_forwards(Name) -> gen_statem:call(name(Name), get_forwards, timer:seconds(1000)). - -%% @doc Return all subscriptions (subscription over mqtt connection to remote broker). --spec get_subscriptions(id()) -> [{emqx_types:topic(), qos()}]. -get_subscriptions(Name) -> gen_statem:call(name(Name), get_subscriptions). - -callback_mode() -> [state_functions]. - -%% @doc Config should be a map(). -init(#{name := Name} = ConnectOpts) -> - ?SLOG(debug, #{ - msg => "starting_bridge_worker", - name => Name - }), - erlang:process_flag(trap_exit, true), - State = init_state(ConnectOpts), - self() ! idle, - {ok, idle, State#{ - connect_opts => pre_process_opts(ConnectOpts) - }}. - -init_state(Opts) -> - ReconnDelayMs = maps:get(reconnect_interval, Opts, ?DEFAULT_RECONNECT_DELAY_MS), - StartType = maps:get(start_type, Opts, manual), +init_config(Opts) -> Mountpoint = maps:get(forward_mountpoint, Opts, undefined), - MaxInflightSize = maps:get(max_inflight, Opts, ?DEFAULT_INFLIGHT_SIZE), - Name = maps:get(name, Opts, undefined), + Subscriptions = maps:get(subscriptions, Opts, undefined), + Forwards = maps:get(forwards, Opts, undefined), #{ - start_type => StartType, - reconnect_interval => ReconnDelayMs, mountpoint => format_mountpoint(Mountpoint), - max_inflight => MaxInflightSize, - connection => undefined, - name => Name + subscriptions => pre_process_subscriptions(Subscriptions), + forwards => pre_process_forwards(Forwards) }. -pre_process_opts(#{subscriptions := InConf, forwards := OutConf} = ConnectOpts) -> - ConnectOpts#{ - subscriptions => pre_process_in_out(in, InConf), - forwards => pre_process_in_out(out, OutConf) +mk_client_options(Conf, BridgeOpts) -> + Server = iolist_to_binary(maps:get(server, BridgeOpts)), + HostPort = emqx_connector_mqtt_schema:parse_server(Server), + Mountpoint = maps:get(receive_mountpoint, BridgeOpts, undefined), + Subscriptions = maps:get(subscriptions, Conf), + Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Subscriptions), + Opts = maps:without( + [ + address, + auto_reconnect, + conn_type, + mountpoint, + forwards, + receive_mountpoint, + subscriptions + ], + BridgeOpts + ), + Opts#{ + msg_handler => mk_client_event_handler(Vars, #{server => Server}), + hosts => [HostPort], + force_ping => true, + proto_ver => maps:get(proto_ver, BridgeOpts, v4) }. -pre_process_in_out(_, undefined) -> +mk_client_event_handler(Vars, Opts) when Vars /= undefined -> + #{ + publish => {fun ?MODULE:handle_publish/3, [Vars, Opts]}, + disconnected => {fun ?MODULE:handle_disconnect/1, []} + }; +mk_client_event_handler(undefined, _Opts) -> + undefined. + +connect(Name) -> + #{subscriptions := Subscriptions} = get_config(Name), + case emqtt:connect(get_pid(Name)) of + {ok, Properties} -> + case subscribe_remote_topics(Name, Subscriptions) of + ok -> + {ok, Properties}; + {ok, _, _RCs} -> + {ok, Properties}; + {error, Reason} = Error -> + ?SLOG(error, #{ + msg => "client_subscribe_failed", + subscriptions => Subscriptions, + reason => Reason + }), + Error + end; + {error, Reason} = Error -> + ?SLOG(error, #{ + msg => "client_connect_failed", + reason => Reason + }), + Error + end. + +subscribe_remote_topics(Ref, #{remote := #{topic := FromTopic, qos := QoS}}) -> + emqtt:subscribe(ref(Ref), FromTopic, QoS); +subscribe_remote_topics(_Ref, undefined) -> + ok. + +stop(Ref) -> + emqtt:stop(ref(Ref)). + +status(Ref) -> + try + Info = emqtt:info(ref(Ref)), + case proplists:get_value(socket, Info) of + Socket when Socket /= undefined -> + connected; + undefined -> + connecting + end + catch + exit:{noproc, _} -> + disconnected + end. + +ping(Ref) -> + emqtt:ping(ref(Ref)). + +send_to_remote(Name, MsgIn) -> + trycall(fun() -> do_send(Name, export_msg(Name, MsgIn)) end). + +do_send(Name, {true, Msg}) -> + case emqtt:publish(get_pid(Name), Msg) of + ok -> + ok; + {ok, #{reason_code := RC}} when + RC =:= ?RC_SUCCESS; + RC =:= ?RC_NO_MATCHING_SUBSCRIBERS + -> + ok; + {ok, #{reason_code := RC, reason_code_name := Reason}} -> + ?SLOG(warning, #{ + msg => "remote_publish_failed", + message => Msg, + reason_code => RC, + reason_code_name => Reason + }), + {error, Reason}; + {error, Reason} -> + ?SLOG(info, #{ + msg => "client_failed", + reason => Reason + }), + {error, Reason} + end; +do_send(_Name, false) -> + ok. + +send_to_remote_async(Name, MsgIn, Callback) -> + trycall(fun() -> do_send_async(Name, export_msg(Name, MsgIn), Callback) end). + +do_send_async(Name, {true, Msg}, Callback) -> + Pid = get_pid(Name), + ok = emqtt:publish_async(Pid, Msg, _Timeout = infinity, Callback), + {ok, Pid}; +do_send_async(_Name, false, _Callback) -> + ok. + +ref(Pid) when is_pid(Pid) -> + Pid; +ref(Term) -> + ?REF(Term). + +trycall(Fun) -> + try + Fun() + catch + throw:noproc -> + {error, disconnected}; + exit:{noproc, _} -> + {error, disconnected} + end. + +format_mountpoint(undefined) -> undefined; -pre_process_in_out(in, #{local := LC} = Conf) when is_map(Conf) -> +format_mountpoint(Prefix) -> + binary:replace(iolist_to_binary(Prefix), <<"${node}">>, atom_to_binary(node(), utf8)). + +pre_process_subscriptions(undefined) -> + undefined; +pre_process_subscriptions(#{local := LC} = Conf) when is_map(Conf) -> Conf#{local => pre_process_in_out_common(LC)}; -pre_process_in_out(in, Conf) when is_map(Conf) -> +pre_process_subscriptions(Conf) when is_map(Conf) -> %% have no 'local' field in the config + undefined. + +pre_process_forwards(undefined) -> undefined; -pre_process_in_out(out, #{remote := RC} = Conf) when is_map(Conf) -> +pre_process_forwards(#{remote := RC} = Conf) when is_map(Conf) -> Conf#{remote => pre_process_in_out_common(RC)}; -pre_process_in_out(out, Conf) when is_map(Conf) -> +pre_process_forwards(Conf) when is_map(Conf) -> %% have no 'remote' field in the config undefined. @@ -247,238 +314,110 @@ pre_process_conf(Key, Conf) -> Conf#{Key => Val} end. -code_change(_Vsn, State, Data, _Extra) -> - {ok, State, Data}. +get_pid(Name) -> + case gproc:where(?NAME(Name)) of + Pid when is_pid(Pid) -> + Pid; + undefined -> + throw(noproc) + end. -terminate(_Reason, _StateName, State) -> - _ = disconnect(State), - maybe_destroy_session(State). - -maybe_destroy_session(#{connect_opts := ConnectOpts = #{clean_start := false}} = State) -> +get_config(Name) -> try - %% Destroy session if clean_start is not set. - %% Ignore any crashes, just refresh the clean_start = true. - _ = do_connect(State#{connect_opts => ConnectOpts#{clean_start => true}}), - _ = disconnect(State), - ok + gproc:lookup_value(?NAME(Name)) catch - _:_ -> + error:badarg -> + throw(noproc) + end. + +export_msg(Name, Msg) -> + case get_config(Name) of + #{forwards := Forwards = #{}, mountpoint := Mountpoint} -> + {true, export_msg(Mountpoint, Forwards, Msg)}; + #{forwards := undefined} -> + ?SLOG(error, #{ + msg => "forwarding_unavailable", + message => Msg, + reason => "egress is not configured" + }), + false + end. + +export_msg(Mountpoint, Forwards, Msg) -> + Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Forwards), + emqx_connector_mqtt_msg:to_remote_msg(Msg, Vars). + +%% + +handle_publish(#{properties := Props} = MsgIn, Vars, Opts) -> + Msg = import_msg(MsgIn, Opts), + ?SLOG(debug, #{ + msg => "publish_local", + message => Msg, + vars => Vars + }), + case Vars of + #{on_message_received := {Mod, Func, Args}} -> + _ = erlang:apply(Mod, Func, [Msg | Args]); + _ -> ok - end; -maybe_destroy_session(_State) -> + end, + maybe_publish_local(Msg, Vars, Props). + +handle_disconnect(_Reason) -> ok. -%% ensure_started will be deprecated in the future -idle({call, From}, ensure_started, State) -> - case do_connect(State) of - {ok, State1} -> - {next_state, connected, State1, [{reply, From, ok}, {state_timeout, 0, connected}]}; - {error, Reason, _State} -> - {keep_state_and_data, [{reply, From, {error, Reason}}]} - end; -idle({call, From}, {send_to_remote, _}, _State) -> - {keep_state_and_data, [{reply, From, {error, {recoverable_error, not_connected}}}]}; -%% @doc Standing by for manual start. -idle(info, idle, #{start_type := manual}) -> - keep_state_and_data; -%% @doc Standing by for auto start. -idle(info, idle, #{start_type := auto} = State) -> - connecting(State); -idle(state_timeout, reconnect, State) -> - connecting(State); -idle(Type, Content, State) -> - common(idle, Type, Content, State). - -connecting(#{reconnect_interval := ReconnectDelayMs} = State) -> - case do_connect(State) of - {ok, State1} -> - {next_state, connected, State1, {state_timeout, 0, connected}}; +maybe_publish_local(Msg, Vars, Props) -> + case emqx_map_lib:deep_get([local, topic], Vars, undefined) of + %% local topic is not set, discard it + undefined -> + ok; _ -> - {keep_state_and_data, {state_timeout, ReconnectDelayMs, reconnect}} + emqx_broker:publish(emqx_connector_mqtt_msg:to_broker_msg(Msg, Vars, Props)) end. -connected(state_timeout, connected, State) -> - %% nothing to do - {keep_state, State}; -connected({call, From}, {send_to_remote, Msg}, State) -> - case do_send(State, Msg) of - {ok, NState} -> - {keep_state, NState, [{reply, From, ok}]}; - {error, Reason} -> - {keep_state_and_data, [[reply, From, {error, Reason}]]} - end; -connected(cast, {send_to_remote_async, Msg, Callback}, State) -> - _ = do_send_async(State, Msg, Callback), - {keep_state, State}; -connected( - info, - {disconnected, Conn, Reason}, - #{connection := Connection, name := Name, reconnect_interval := ReconnectDelayMs} = State -) -> - ?tp(info, disconnected, #{name => Name, reason => Reason}), - case Conn =:= maps:get(client_pid, Connection, undefined) of - true -> - {next_state, idle, State#{connection => undefined}, - {state_timeout, ReconnectDelayMs, reconnect}}; - false -> - keep_state_and_data - end; -connected(Type, Content, State) -> - common(connected, Type, Content, State). - -%% Common handlers -common(StateName, {call, From}, status, _State) -> - {keep_state_and_data, [{reply, From, StateName}]}; -common(_StateName, {call, From}, ping, #{connection := Conn} = _State) -> - Reply = emqx_connector_mqtt_mod:ping(Conn), - {keep_state_and_data, [{reply, From, Reply}]}; -common(_StateName, {call, From}, ensure_stopped, #{connection := undefined} = _State) -> - {keep_state_and_data, [{reply, From, ok}]}; -common(_StateName, {call, From}, ensure_stopped, #{connection := Conn} = State) -> - Reply = emqx_connector_mqtt_mod:stop(Conn), - {next_state, idle, State#{connection => undefined}, [{reply, From, Reply}]}; -common(_StateName, {call, From}, get_forwards, #{connect_opts := #{forwards := Forwards}}) -> - {keep_state_and_data, [{reply, From, Forwards}]}; -common(_StateName, {call, From}, get_subscriptions, #{connection := Connection}) -> - {keep_state_and_data, [{reply, From, maps:get(subscriptions, Connection, #{})}]}; -common(_StateName, {call, From}, Req, _State) -> - {keep_state_and_data, [{reply, From, {error, {unsupported_request, Req}}}]}; -common(_StateName, info, {'EXIT', _, _}, State) -> - {keep_state, State}; -common(StateName, Type, Content, #{name := Name} = State) -> - ?SLOG(error, #{ - msg => "bridge_discarded_event", - name => Name, - type => Type, - state_name => StateName, - content => Content - }), - {keep_state, State}. - -do_connect( +import_msg( #{ - connect_opts := ConnectOpts, - name := Name - } = State -) -> - case emqx_connector_mqtt_mod:start(ConnectOpts) of - {ok, Conn} -> - ?tp(info, connected, #{name => Name}), - {ok, State#{connection => Conn}}; - {error, Reason} -> - ConnectOpts1 = obfuscate(ConnectOpts), - ?SLOG(error, #{ - msg => "failed_to_connect", - config => ConnectOpts1, - reason => Reason - }), - {error, Reason, State} - end. - -do_send(#{connect_opts := #{forwards := undefined}}, Msg) -> - ?SLOG(error, #{ - msg => - "cannot_forward_messages_to_remote_broker" - "_as_'egress'_is_not_configured", - messages => Msg - }); -do_send( - #{ - connection := Connection, - mountpoint := Mountpoint, - connect_opts := #{forwards := Forwards} - } = State, - Msg -) -> - Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Forwards), - ExportMsg = emqx_connector_mqtt_msg:to_remote_msg(Msg, Vars), - ?SLOG(debug, #{ - msg => "publish_to_remote_broker", - message => Msg, - vars => Vars - }), - case emqx_connector_mqtt_mod:send(Connection, ExportMsg) of - ok -> - {ok, State}; - {ok, #{reason_code := RC}} when - RC =:= ?RC_SUCCESS; - RC =:= ?RC_NO_MATCHING_SUBSCRIBERS - -> - {ok, State}; - {ok, #{reason_code := RC, reason_code_name := RCN}} -> - ?SLOG(warning, #{ - msg => "publish_to_remote_node_falied", - message => Msg, - reason_code => RC, - reason_code_name => RCN - }), - {error, RCN}; - {error, Reason} -> - ?SLOG(info, #{ - msg => "mqtt_bridge_produce_failed", - reason => Reason - }), - {error, Reason} - end. - -do_send_async(#{connect_opts := #{forwards := undefined}}, Msg, _Callback) -> - %% TODO: eval callback with undefined error - ?SLOG(error, #{ - msg => - "cannot_forward_messages_to_remote_broker" - "_as_'egress'_is_not_configured", - messages => Msg - }); -do_send_async( - #{ - connection := Connection, - mountpoint := Mountpoint, - connect_opts := #{forwards := Forwards} + dup := Dup, + payload := Payload, + properties := Props, + qos := QoS, + retain := Retain, + topic := Topic }, - Msg, - Callback + #{server := Server} ) -> - Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Forwards), - ExportMsg = emqx_connector_mqtt_msg:to_remote_msg(Msg, Vars), - ?SLOG(debug, #{ - msg => "publish_to_remote_broker", - message => Msg, - vars => Vars - }), - emqx_connector_mqtt_mod:send_async(Connection, ExportMsg, Callback). + #{ + id => emqx_guid:to_hexstr(emqx_guid:gen()), + server => Server, + payload => Payload, + topic => Topic, + qos => QoS, + dup => Dup, + retain => Retain, + pub_props => printable_maps(Props), + message_received_at => erlang:system_time(millisecond) + }. -disconnect(#{connection := Conn} = State) when Conn =/= undefined -> - emqx_connector_mqtt_mod:stop(Conn), - State#{connection => undefined}; -disconnect(State) -> - State. - -format_mountpoint(undefined) -> - undefined; -format_mountpoint(Prefix) -> - binary:replace(iolist_to_binary(Prefix), <<"${node}">>, atom_to_binary(node(), utf8)). - -name(Id) -> list_to_atom(str(Id)). - -obfuscate(Map) -> +printable_maps(undefined) -> + #{}; +printable_maps(Headers) -> maps:fold( - fun(K, V, Acc) -> - case is_sensitive(K) of - true -> [{K, '***'} | Acc]; - false -> [{K, V} | Acc] - end + fun + ('User-Property', V0, AccIn) when is_list(V0) -> + AccIn#{ + 'User-Property' => maps:from_list(V0), + 'User-Property-Pairs' => [ + #{ + key => Key, + value => Value + } + || {Key, Value} <- V0 + ] + }; + (K, V0, AccIn) -> + AccIn#{K => V0} end, - [], - Map + #{}, + Headers ). - -is_sensitive(password) -> true; -is_sensitive(ssl_opts) -> true; -is_sensitive(_) -> false. - -str(A) when is_atom(A) -> - atom_to_list(A); -str(B) when is_binary(B) -> - binary_to_list(B); -str(S) when is_list(S) -> - S. diff --git a/apps/emqx_connector/test/emqx_connector_mqtt_tests.erl b/apps/emqx_connector/test/emqx_connector_mqtt_tests.erl deleted file mode 100644 index 88c8b5218..000000000 --- a/apps/emqx_connector/test/emqx_connector_mqtt_tests.erl +++ /dev/null @@ -1,60 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(emqx_connector_mqtt_tests). - --include_lib("eunit/include/eunit.hrl"). --include_lib("emqx/include/emqx_mqtt.hrl"). - -send_and_ack_test() -> - %% delegate from gen_rpc to rpc for unit test - meck:new(emqtt, [passthrough, no_history]), - meck:expect( - emqtt, - start_link, - 1, - fun(_) -> - {ok, spawn_link(fun() -> ok end)} - end - ), - meck:expect(emqtt, connect, 1, {ok, dummy}), - meck:expect( - emqtt, - stop, - 1, - fun(Pid) -> Pid ! stop end - ), - meck:expect( - emqtt, - publish, - 2, - fun(Client, Msg) -> - Client ! {publish, Msg}, - %% as packet id - {ok, Msg} - end - ), - try - Max = 1, - Batch = lists:seq(1, Max), - {ok, Conn} = emqx_connector_mqtt_mod:start(#{server => "127.0.0.1:1883"}), - %% return last packet id as batch reference - {ok, _AckRef} = emqx_connector_mqtt_mod:send(Conn, Batch), - - ok = emqx_connector_mqtt_mod:stop(Conn) - after - meck:unload(emqtt) - end. diff --git a/apps/emqx_connector/test/emqx_connector_mqtt_worker_tests.erl b/apps/emqx_connector/test/emqx_connector_mqtt_worker_tests.erl deleted file mode 100644 index 49bff7bbc..000000000 --- a/apps/emqx_connector/test/emqx_connector_mqtt_worker_tests.erl +++ /dev/null @@ -1,101 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(emqx_connector_mqtt_worker_tests). - --include_lib("eunit/include/eunit.hrl"). --include_lib("emqx/include/emqx.hrl"). --include_lib("emqx/include/emqx_mqtt.hrl"). - --define(BRIDGE_NAME, test). --define(BRIDGE_REG_NAME, emqx_connector_mqtt_worker_test). --define(WAIT(PATTERN, TIMEOUT), - receive - PATTERN -> - ok - after TIMEOUT -> - error(timeout) - end -). - --export([start/1, send/2, stop/1]). - -start(#{connect_result := Result, test_pid := Pid, test_ref := Ref}) -> - case is_pid(Pid) of - true -> Pid ! {connection_start_attempt, Ref}; - false -> ok - end, - Result. - -send(SendFun, Batch) when is_function(SendFun, 2) -> - SendFun(Batch). - -stop(_Pid) -> ok. - -%% connect first, disconnect, then connect again -disturbance_test() -> - meck:new(emqx_connector_mqtt_mod, [passthrough, no_history]), - meck:expect(emqx_connector_mqtt_mod, start, 1, fun(Conf) -> start(Conf) end), - meck:expect(emqx_connector_mqtt_mod, send, 2, fun(SendFun, Batch) -> send(SendFun, Batch) end), - meck:expect(emqx_connector_mqtt_mod, stop, 1, fun(Pid) -> stop(Pid) end), - try - emqx_metrics:start_link(), - Ref = make_ref(), - TestPid = self(), - Config = make_config(Ref, TestPid, {ok, #{client_pid => TestPid}}), - {ok, Pid} = emqx_connector_mqtt_worker:start_link(Config#{name => bridge_disturbance}), - ?assertEqual(Pid, whereis(bridge_disturbance)), - ?WAIT({connection_start_attempt, Ref}, 1000), - Pid ! {disconnected, TestPid, test}, - ?WAIT({connection_start_attempt, Ref}, 1000), - emqx_metrics:stop(), - ok = emqx_connector_mqtt_worker:stop(Pid) - after - meck:unload(emqx_connector_mqtt_mod) - end. - -manual_start_stop_test() -> - meck:new(emqx_connector_mqtt_mod, [passthrough, no_history]), - meck:expect(emqx_connector_mqtt_mod, start, 1, fun(Conf) -> start(Conf) end), - meck:expect(emqx_connector_mqtt_mod, send, 2, fun(SendFun, Batch) -> send(SendFun, Batch) end), - meck:expect(emqx_connector_mqtt_mod, stop, 1, fun(Pid) -> stop(Pid) end), - try - emqx_metrics:start_link(), - Ref = make_ref(), - TestPid = self(), - BridgeName = manual_start_stop, - Config0 = make_config(Ref, TestPid, {ok, #{client_pid => TestPid}}), - Config = Config0#{start_type := manual}, - {ok, Pid} = emqx_connector_mqtt_worker:start_link(Config#{name => BridgeName}), - %% call ensure_started again should yield the same result - ok = emqx_connector_mqtt_worker:ensure_started(BridgeName), - emqx_connector_mqtt_worker:ensure_stopped(BridgeName), - emqx_metrics:stop(), - ok = emqx_connector_mqtt_worker:stop(Pid) - after - meck:unload(emqx_connector_mqtt_mod) - end. - -make_config(Ref, TestPid, Result) -> - #{ - start_type => auto, - subscriptions => undefined, - forwards => undefined, - reconnect_interval => 50, - test_pid => TestPid, - test_ref => Ref, - connect_result => Result - }. diff --git a/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl b/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl index 87d2b8e21..3a134ad35 100644 --- a/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl +++ b/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl @@ -27,6 +27,8 @@ -define(REDIS_SINGLE_PORT, 6379). -define(REDIS_SENTINEL_HOST, "redis-sentinel"). -define(REDIS_SENTINEL_PORT, 26379). +-define(REDIS_CLUSTER_HOST, "redis-cluster-1"). +-define(REDIS_CLUSTER_PORT, 6379). -define(REDIS_RESOURCE_MOD, emqx_connector_redis). all() -> @@ -203,8 +205,8 @@ redis_config_base(Type, ServerKey) -> MaybeSentinel = "", MaybeDatabase = " database = 1\n"; "cluster" -> - Host = ?REDIS_SINGLE_HOST, - Port = ?REDIS_SINGLE_PORT, + Host = ?REDIS_CLUSTER_HOST, + Port = ?REDIS_CLUSTER_PORT, MaybeSentinel = "", MaybeDatabase = "" end, diff --git a/apps/emqx_dashboard/src/emqx_dashboard_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_api.erl index a4322c696..cc2a1337d 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_api.erl @@ -325,7 +325,7 @@ is_self_auth_token(Username, Token) -> end. change_pwd(post, #{bindings := #{username := Username}, body := Params}) -> - LogMeta = #{msg => "Dashboard change password", username => Username}, + LogMeta = #{msg => "Dashboard change password", username => binary_to_list(Username)}, OldPwd = maps:get(<<"old_pwd">>, Params), NewPwd = maps:get(<<"new_pwd">>, Params), case ?EMPTY(OldPwd) orelse ?EMPTY(NewPwd) of diff --git a/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf b/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf index 0ab09520e..ca8bf0769 100644 --- a/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf +++ b/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf @@ -62,7 +62,7 @@ The default is false.""" duration { desc { - en: """Indicates how long the alarm has lasted, in milliseconds.""" + en: """Indicates how long the alarm has been active in milliseconds.""" zh: """表明告警已经持续了多久,单位:毫秒。""" } } diff --git a/apps/emqx_management/src/emqx_management.app.src b/apps/emqx_management/src/emqx_management.app.src index ccb53dac4..158d65b6b 100644 --- a/apps/emqx_management/src/emqx_management.app.src +++ b/apps/emqx_management/src/emqx_management.app.src @@ -2,7 +2,7 @@ {application, emqx_management, [ {description, "EMQX Management API and CLI"}, % strict semver, bump manually! - {vsn, "5.0.12"}, + {vsn, "5.0.13"}, {modules, []}, {registered, [emqx_management_sup]}, {applications, [kernel, stdlib, emqx_plugins, minirest, emqx]}, diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 6b38e8ca0..814b39cdc 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -126,7 +126,7 @@ lookup_node(Node) -> node_info() -> {UsedRatio, Total} = get_sys_memory(), - Info = maps:from_list([{K, list_to_binary(V)} || {K, V} <- emqx_vm:loads()]), + Info = maps:from_list(emqx_vm:loads()), BrokerInfo = emqx_sys:info(), Info#{ node => node(), @@ -150,7 +150,7 @@ node_info() -> get_sys_memory() -> case os:type() of {unix, linux} -> - load_ctl:get_sys_memory(); + emqx_mgmt_cache:get_sys_memory(); _ -> {0, 0} end. diff --git a/apps/emqx_management/src/emqx_mgmt_api_nodes.erl b/apps/emqx_management/src/emqx_mgmt_api_nodes.erl index 64ef3c1ef..cb8d37609 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_nodes.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_nodes.erl @@ -159,18 +159,18 @@ fields(node_info) -> )}, {load1, mk( - string(), - #{desc => <<"CPU average load in 1 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 1 minute">>, example => 2.66} )}, {load5, mk( - string(), - #{desc => <<"CPU average load in 5 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 5 minute">>, example => 2.66} )}, {load15, mk( - string(), - #{desc => <<"CPU average load in 15 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 15 minute">>, example => 2.66} )}, {max_fds, mk( diff --git a/apps/emqx_management/src/emqx_mgmt_api_topics.erl b/apps/emqx_management/src/emqx_mgmt_api_topics.erl index a64badd3a..4100269e5 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_topics.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_topics.erl @@ -75,7 +75,7 @@ schema("/topics/:topic") -> tags => ?TAGS, parameters => [topic_param(path)], responses => #{ - 200 => hoconsc:mk(hoconsc:ref(topic), #{}), + 200 => hoconsc:mk(hoconsc:array(hoconsc:ref(topic)), #{}), 404 => emqx_dashboard_swagger:error_codes(['TOPIC_NOT_FOUND'], <<"Topic not found">>) } @@ -130,8 +130,9 @@ lookup(#{topic := Topic}) -> case emqx_router:lookup_routes(Topic) of [] -> {404, #{code => ?TOPIC_NOT_FOUND, message => <<"Topic not found">>}}; - [Route] -> - {200, format(Route)} + Routes when is_list(Routes) -> + Formatted = [format(Route) || Route <- Routes], + {200, Formatted} end. %%%============================================================================================== diff --git a/apps/emqx_management/src/emqx_mgmt_cache.erl b/apps/emqx_management/src/emqx_mgmt_cache.erl new file mode 100644 index 000000000..e7f9ac0b1 --- /dev/null +++ b/apps/emqx_management/src/emqx_mgmt_cache.erl @@ -0,0 +1,108 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_mgmt_cache). + +-behaviour(gen_server). + +-define(SYS_MEMORY_KEY, sys_memory). +-define(EXPIRED_MS, 3000). +%% -100ms to early update cache +-define(REFRESH_MS, ?EXPIRED_MS - 100). +-define(DEFAULT_BAD_MEMORY, {0, 0}). + +-export([start_link/0, get_sys_memory/0]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). + +get_sys_memory() -> + case get_memory_from_cache() of + {ok, CacheMem} -> + erlang:send(?MODULE, refresh_sys_memory), + CacheMem; + stale -> + get_sys_memory_sync() + end. + +get_sys_memory_sync() -> + try + gen_server:call(?MODULE, get_sys_memory, ?EXPIRED_MS) + catch + exit:{timeout, _} -> + ?DEFAULT_BAD_MEMORY + end. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + _ = ets:new(?MODULE, [set, named_table, public, {keypos, 1}]), + {ok, #{latest_refresh => 0}}. + +handle_call(get_sys_memory, _From, State) -> + {Mem, NewState} = refresh_sys_memory(State), + {reply, Mem, NewState}; +handle_call(_Request, _From, State) -> + {reply, ok, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(refresh_sys_memory, State) -> + {_, NewState} = refresh_sys_memory(State), + {noreply, NewState}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +refresh_sys_memory(State = #{latest_refresh := LatestRefresh}) -> + Now = now_millisecond(), + case Now - LatestRefresh >= ?REFRESH_MS of + true -> + do_refresh_sys_memory(Now, State); + false -> + case get_memory_from_cache() of + stale -> do_refresh_sys_memory(Now, State); + {ok, Mem} -> {Mem, State} + end + end. + +do_refresh_sys_memory(RefreshAt, State) -> + NewMem = load_ctl:get_sys_memory(), + NewExpiredAt = now_millisecond() + ?EXPIRED_MS, + ets:insert(?MODULE, {?SYS_MEMORY_KEY, {NewMem, NewExpiredAt}}), + {NewMem, State#{latest_refresh => RefreshAt}}. + +get_memory_from_cache() -> + case ets:lookup(?MODULE, ?SYS_MEMORY_KEY) of + [] -> + stale; + [{_, {Mem, ExpiredAt}}] -> + case now_millisecond() < ExpiredAt of + true -> {ok, Mem}; + false -> stale + end + end. + +now_millisecond() -> + erlang:system_time(millisecond). diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 0e7506a0b..442d5c7de 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -315,7 +315,7 @@ vm([]) -> vm(["all"]) -> [vm([Name]) || Name <- ["load", "memory", "process", "io", "ports"]]; vm(["load"]) -> - [emqx_ctl:print("cpu/~-20s: ~ts~n", [L, V]) || {L, V} <- emqx_vm:loads()]; + [emqx_ctl:print("cpu/~-20s: ~w~n", [L, V]) || {L, V} <- emqx_vm:loads()]; vm(["memory"]) -> [emqx_ctl:print("memory/~-17s: ~w~n", [Cat, Val]) || {Cat, Val} <- erlang:memory()]; vm(["process"]) -> diff --git a/apps/emqx_management/src/emqx_mgmt_sup.erl b/apps/emqx_management/src/emqx_mgmt_sup.erl index 329532fa1..713ff87dc 100644 --- a/apps/emqx_management/src/emqx_mgmt_sup.erl +++ b/apps/emqx_management/src/emqx_mgmt_sup.erl @@ -26,4 +26,21 @@ start_link() -> supervisor:start_link({local, ?MODULE}, ?MODULE, []). init([]) -> - {ok, {{one_for_one, 1, 5}, []}}. + Workers = + case os:type() of + {unix, linux} -> + [child_spec(emqx_mgmt_cache, 5000, worker)]; + _ -> + [] + end, + {ok, {{one_for_one, 1, 5}, Workers}}. + +child_spec(Mod, Shutdown, Type) -> + #{ + id => Mod, + start => {Mod, start_link, []}, + restart => permanent, + shutdown => Shutdown, + type => Type, + modules => [Mod] + }. diff --git a/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl index adff41214..2c61651bf 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl @@ -40,6 +40,9 @@ t_alarms_api(_) -> get_alarms(1, true), get_alarms(1, false). +t_alarm_cpu(_) -> + ok. + t_delete_alarms_api(_) -> Path = emqx_mgmt_api_test_util:api_path(["alarms"]), {ok, _} = emqx_mgmt_api_test_util:request_api(delete, Path), diff --git a/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl index 9a8824e7d..0e212d52f 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl @@ -20,6 +20,8 @@ -include_lib("eunit/include/eunit.hrl"). +-define(PORT, (20000 + ?LINE)). + all() -> emqx_common_test_helpers:all(?MODULE). @@ -32,13 +34,38 @@ end_per_suite(_) -> emqx_conf:remove([listeners, tcp, new1], #{override_to => local}), emqx_mgmt_api_test_util:end_suite([emqx_conf]). -t_max_connection_default(_Config) -> +init_per_testcase(Case, Config) -> + try + ?MODULE:Case({init, Config}) + catch + error:function_clause -> + Config + end. + +end_per_testcase(Case, Config) -> + try + ?MODULE:Case({'end', Config}) + catch + error:function_clause -> + ok + end. + +t_max_connection_default({init, Config}) -> emqx_mgmt_api_test_util:end_suite([emqx_conf]), Etc = filename:join(["etc", "emqx.conf.all"]), + TmpConfName = atom_to_list(?FUNCTION_NAME) ++ ".conf", + Inc = filename:join(["etc", TmpConfName]), ConfFile = emqx_common_test_helpers:app_path(emqx_conf, Etc), - Bin = <<"listeners.tcp.max_connection_test {bind = \"0.0.0.0:3883\"}">>, - ok = file:write_file(ConfFile, Bin, [append]), + IncFile = emqx_common_test_helpers:app_path(emqx_conf, Inc), + Port = integer_to_binary(?PORT), + Bin = <<"listeners.tcp.max_connection_test {bind = \"0.0.0.0:", Port/binary, "\"}">>, + ok = file:write_file(IncFile, Bin), + ok = file:write_file(ConfFile, ["include \"", TmpConfName, "\""], [append]), emqx_mgmt_api_test_util:init_suite([emqx_conf]), + [{tmp_config_file, IncFile} | Config]; +t_max_connection_default({'end', Config}) -> + ok = file:delete(proplists:get_value(tmp_config_file, Config)); +t_max_connection_default(Config) when is_list(Config) -> %% Check infinity is binary not atom. #{<<"listeners">> := Listeners} = emqx_mgmt_api_listeners:do_list_listeners(), Target = lists:filter( @@ -51,7 +78,7 @@ t_max_connection_default(_Config) -> emqx_conf:remove([listeners, tcp, max_connection_test], #{override_to => cluster}), ok. -t_list_listeners(_) -> +t_list_listeners(Config) when is_list(Config) -> Path = emqx_mgmt_api_test_util:api_path(["listeners"]), Res = request(get, Path, [], []), #{<<"listeners">> := Expect} = emqx_mgmt_api_listeners:do_list_listeners(), @@ -71,9 +98,10 @@ t_list_listeners(_) -> ?assertMatch({error, {"HTTP/1.1", 404, _}}, request(get, NewPath, [], [])), OriginListener2 = maps:remove(<<"id">>, OriginListener), + Port = integer_to_binary(?PORT), NewConf = OriginListener2#{ <<"name">> => <<"new">>, - <<"bind">> => <<"0.0.0.0:2883">>, + <<"bind">> => <<"0.0.0.0:", Port/binary>>, <<"max_connections">> := <<"infinity">> }, Create = request(post, Path, [], NewConf), @@ -89,7 +117,7 @@ t_list_listeners(_) -> ?assertMatch({error, {"HTTP/1.1", 404, _}}, request(get, NewPath, [], [])), ok. -t_tcp_crud_listeners_by_id(_) -> +t_tcp_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"tcp:default">>, NewListenerId = <<"tcp:new">>, MinListenerId = <<"tcp:min">>, @@ -97,7 +125,7 @@ t_tcp_crud_listeners_by_id(_) -> Type = <<"tcp">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_ssl_crud_listeners_by_id(_) -> +t_ssl_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"ssl:default">>, NewListenerId = <<"ssl:new">>, MinListenerId = <<"ssl:min">>, @@ -105,7 +133,7 @@ t_ssl_crud_listeners_by_id(_) -> Type = <<"ssl">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_ws_crud_listeners_by_id(_) -> +t_ws_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"ws:default">>, NewListenerId = <<"ws:new">>, MinListenerId = <<"ws:min">>, @@ -113,7 +141,7 @@ t_ws_crud_listeners_by_id(_) -> Type = <<"ws">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_wss_crud_listeners_by_id(_) -> +t_wss_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"wss:default">>, NewListenerId = <<"wss:new">>, MinListenerId = <<"wss:min">>, @@ -121,7 +149,7 @@ t_wss_crud_listeners_by_id(_) -> Type = <<"wss">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_api_listeners_list_not_ready(_Config) -> +t_api_listeners_list_not_ready(Config) when is_list(Config) -> net_kernel:start(['listeners@127.0.0.1', longnames]), ct:timetrap({seconds, 120}), snabbkaffe:fix_ct_logging(), @@ -151,16 +179,17 @@ t_api_listeners_list_not_ready(_Config) -> emqx_common_test_helpers:stop_slave(Node2) end. -t_clear_certs(_) -> +t_clear_certs(Config) when is_list(Config) -> ListenerId = <<"ssl:default">>, NewListenerId = <<"ssl:clear">>, OriginPath = emqx_mgmt_api_test_util:api_path(["listeners", ListenerId]), NewPath = emqx_mgmt_api_test_util:api_path(["listeners", NewListenerId]), ConfTempT = request(get, OriginPath, [], []), + Port = integer_to_binary(?PORT), ConfTemp = ConfTempT#{ <<"id">> => NewListenerId, - <<"bind">> => <<"0.0.0.0:2883">> + <<"bind">> => <<"0.0.0.0:", Port/binary>> }, %% create, make sure the cert files are created @@ -245,9 +274,11 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> %% create with full options ?assertEqual({error, not_found}, is_running(NewListenerId)), ?assertMatch({error, {"HTTP/1.1", 404, _}}, request(get, NewPath, [], [])), + Port1 = integer_to_binary(?PORT), + Port2 = integer_to_binary(?PORT), NewConf = OriginListener#{ <<"id">> => NewListenerId, - <<"bind">> => <<"0.0.0.0:2883">> + <<"bind">> => <<"0.0.0.0:", Port1/binary>> }, Create = request(post, NewPath, [], NewConf), ?assertEqual(lists:sort(maps:keys(OriginListener)), lists:sort(maps:keys(Create))), @@ -271,7 +302,7 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> } -> #{ <<"id">> => MinListenerId, - <<"bind">> => <<"0.0.0.0:3883">>, + <<"bind">> => <<"0.0.0.0:", Port2/binary>>, <<"type">> => Type, <<"ssl_options">> => #{ <<"cacertfile">> => CaCertFile, @@ -282,7 +313,7 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> _ -> #{ <<"id">> => MinListenerId, - <<"bind">> => <<"0.0.0.0:3883">>, + <<"bind">> => <<"0.0.0.0:", Port2/binary>>, <<"type">> => Type } end, @@ -296,7 +327,7 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> BadPath = emqx_mgmt_api_test_util:api_path(["listeners", BadId]), BadConf = OriginListener#{ <<"id">> => BadId, - <<"bind">> => <<"0.0.0.0:2883">> + <<"bind">> => <<"0.0.0.0:", Port1/binary>> }, ?assertMatch({error, {"HTTP/1.1", 400, _}}, request(post, BadPath, [], BadConf)), @@ -332,12 +363,12 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> ?assertEqual([], delete(NewPath)), ok. -t_delete_nonexistent_listener(_) -> +t_delete_nonexistent_listener(Config) when is_list(Config) -> NonExist = emqx_mgmt_api_test_util:api_path(["listeners", "tcp:nonexistent"]), ?assertEqual([], delete(NonExist)), ok. -t_action_listeners(_) -> +t_action_listeners(Config) when is_list(Config) -> ID = "tcp:default", action_listener(ID, "stop", false), action_listener(ID, "start", true), diff --git a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl index 2bbdf938d..03b0ea2d9 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl @@ -24,11 +24,11 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - emqx_mgmt_api_test_util:init_suite([emqx_conf]), + emqx_mgmt_api_test_util:init_suite([emqx_conf, emqx_management]), Config. end_per_suite(_) -> - emqx_mgmt_api_test_util:end_suite([emqx_conf]). + emqx_mgmt_api_test_util:end_suite([emqx_management, emqx_conf]). init_per_testcase(t_log_path, Config) -> emqx_config_logger:add_handler(), @@ -152,7 +152,7 @@ cluster(Specs) -> Env = [{emqx, boot_modules, []}], emqx_common_test_helpers:emqx_cluster(Specs, [ {env, Env}, - {apps, [emqx_conf]}, + {apps, [emqx_conf, emqx_management]}, {load_schema, false}, {join_to, true}, {env_handler, fun diff --git a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl index dcea88d59..8f9b224ef 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl @@ -19,18 +19,25 @@ -compile(nowarn_export_all). -include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +-define(ROUTE_TAB, emqx_route). all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> emqx_mgmt_api_test_util:init_suite(), - Config. + Slave = emqx_common_test_helpers:start_slave(some_node, []), + [{slave, Slave} | Config]. -end_per_suite(_) -> +end_per_suite(Config) -> + Slave = ?config(slave, Config), + emqx_common_test_helpers:stop_slave(Slave), + mria:clear_table(?ROUTE_TAB), emqx_mgmt_api_test_util:end_suite(). -t_nodes_api(_) -> +t_nodes_api(Config) -> Node = atom_to_binary(node(), utf8), Topic = <<"test_topic">>, {ok, Client} = emqtt:start_link(#{ @@ -72,8 +79,17 @@ t_nodes_api(_) -> ), %% get topics/:topic + %% We add another route here to ensure that the response handles + %% multiple routes for a single topic + Slave = ?config(slave, Config), + ok = emqx_router:add_route(Topic, Slave), RoutePath = emqx_mgmt_api_test_util:api_path(["topics", Topic]), {ok, RouteResponse} = emqx_mgmt_api_test_util:request_api(get, RoutePath), - RouteData = emqx_json:decode(RouteResponse, [return_maps]), - ?assertEqual(Topic, maps:get(<<"topic">>, RouteData)), - ?assertEqual(Node, maps:get(<<"node">>, RouteData)). + ok = emqx_router:delete_route(Topic, Slave), + + [ + #{<<"topic">> := Topic, <<"node">> := Node1}, + #{<<"topic">> := Topic, <<"node">> := Node2} + ] = emqx_json:decode(RouteResponse, [return_maps]), + + ?assertEqual(lists:usort([Node, atom_to_binary(Slave)]), lists:usort([Node1, Node2])). diff --git a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf index 7e9fcd9e7..f3ac2fd97 100644 --- a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf +++ b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf @@ -58,8 +58,8 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise start_timeout { desc { - en: """If 'start_after_created' enabled, how long time do we wait for the resource get started, in milliseconds.""" - zh: """如果选择了创建后立即启动资源,此选项用来设置等待资源启动的超时时间,单位毫秒。""" + en: """Time interval to wait for an auto-started resource to become healthy before responding resource creation requests.""" + zh: """在回复资源创建请求前等待资源进入健康状态的时间。""" } label { en: """Start Timeout""" @@ -80,8 +80,19 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise query_mode { desc { - en: """Query mode. Optional 'sync/async', default 'sync'.""" - zh: """请求模式。可选 '同步/异步',默认为'同步'模式。""" + en: """Query mode. Optional 'sync/async', default 'async'.""" + zh: """请求模式。可选 '同步/异步',默认为'异步'模式。""" + } + label { + en: """Query mode""" + zh: """请求模式""" + } + } + + query_mode_sync_only { + desc { + en: """Query mode. Only support 'sync'.""" + zh: """请求模式。目前只支持同步模式。""" } label { en: """Query mode""" diff --git a/apps/emqx_resource/include/emqx_resource.hrl b/apps/emqx_resource/include/emqx_resource.hrl index 7464eb4f8..ce4e02c2a 100644 --- a/apps/emqx_resource/include/emqx_resource.hrl +++ b/apps/emqx_resource/include/emqx_resource.hrl @@ -31,7 +31,9 @@ pick_key => term(), timeout => timeout(), expire_at => infinity | integer(), - async_reply_fun => reply_fun() + async_reply_fun => reply_fun(), + simple_query => boolean(), + is_buffer_supported => boolean() }. -type resource_data() :: #{ id := resource_id(), diff --git a/apps/emqx_resource/src/emqx_resource.erl b/apps/emqx_resource/src/emqx_resource.erl index ad7f30b47..1c5eecfbb 100644 --- a/apps/emqx_resource/src/emqx_resource.erl +++ b/apps/emqx_resource/src/emqx_resource.erl @@ -264,7 +264,8 @@ query(ResId, Request, Opts) -> case {IsBufferSupported, QM} of {true, _} -> %% only Kafka so far - emqx_resource_buffer_worker:simple_async_query(ResId, Request); + Opts1 = Opts#{is_buffer_supported => true}, + emqx_resource_buffer_worker:simple_async_query(ResId, Request, Opts1); {false, sync} -> emqx_resource_buffer_worker:sync_query(ResId, Request, Opts); {false, async} -> diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 11d3753f0..29fe79d09 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -20,7 +20,6 @@ -module(emqx_resource_buffer_worker). -include("emqx_resource.hrl"). --include("emqx_resource_utils.hrl"). -include("emqx_resource_errors.hrl"). -include_lib("emqx/include/logger.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). @@ -39,7 +38,7 @@ -export([ simple_sync_query/2, - simple_async_query/2 + simple_async_query/3 ]). -export([ @@ -53,7 +52,7 @@ -export([queue_item_marshaller/1, estimate_size/1]). --export([reply_after_query/8, batch_reply_after_query/8]). +-export([handle_async_reply/2, handle_async_batch_reply/2]). -export([clear_disk_queue_dir/2]). @@ -63,11 +62,7 @@ -define(SEND_REQ(FROM, REQUEST), {'$send_req', FROM, REQUEST}). -define(QUERY(FROM, REQUEST, SENT, EXPIRE_AT), {query, FROM, REQUEST, SENT, EXPIRE_AT}). -define(SIMPLE_QUERY(REQUEST), ?QUERY(undefined, REQUEST, false, infinity)). --define(REPLY(FROM, REQUEST, SENT, RESULT), {reply, FROM, REQUEST, SENT, RESULT}). --define(EXPAND(RESULT, BATCH), [ - ?REPLY(FROM, REQUEST, SENT, RESULT) - || ?QUERY(FROM, REQUEST, SENT, _EXPIRE_AT) <- BATCH -]). +-define(REPLY(FROM, SENT, RESULT), {reply, FROM, SENT, RESULT}). -define(INFLIGHT_ITEM(Ref, BatchOrQuery, IsRetriable, WorkerMRef), {Ref, BatchOrQuery, IsRetriable, WorkerMRef} ). @@ -78,9 +73,8 @@ -type id() :: binary(). -type index() :: pos_integer(). -type expire_at() :: infinity | integer(). --type queue_query() :: ?QUERY(from(), request(), HasBeenSent :: boolean(), expire_at()). +-type queue_query() :: ?QUERY(reply_fun(), request(), HasBeenSent :: boolean(), expire_at()). -type request() :: term(). --type from() :: pid() | reply_fun() | request_from(). -type request_from() :: undefined | gen_statem:from(). -type state() :: blocked | running. -type inflight_key() :: integer(). @@ -130,18 +124,18 @@ simple_sync_query(Id, Request) -> Index = undefined, QueryOpts = simple_query_opts(), emqx_resource_metrics:matched_inc(Id), - Ref = make_message_ref(), + Ref = make_request_ref(), Result = call_query(sync, Id, Index, Ref, ?SIMPLE_QUERY(Request), QueryOpts), _ = handle_query_result(Id, Result, _HasBeenSent = false), Result. %% simple async-query the resource without batching and queuing. --spec simple_async_query(id(), request()) -> term(). -simple_async_query(Id, Request) -> +-spec simple_async_query(id(), request(), query_opts()) -> term(). +simple_async_query(Id, Request, QueryOpts0) -> Index = undefined, - QueryOpts = simple_query_opts(), + QueryOpts = maps:merge(simple_query_opts(), QueryOpts0), emqx_resource_metrics:matched_inc(Id), - Ref = make_message_ref(), + Ref = make_request_ref(), Result = call_query(async, Id, Index, Ref, ?SIMPLE_QUERY(Request), QueryOpts), _ = handle_query_result(Id, Result, _HasBeenSent = false), Result. @@ -201,7 +195,7 @@ init({Id, Index, Opts}) -> {ok, running, Data}. running(enter, _, Data) -> - ?tp(buffer_worker_enter_running, #{}), + ?tp(buffer_worker_enter_running, #{id => maps:get(id, Data)}), %% According to `gen_statem' laws, we mustn't call `maybe_flush' %% directly because it may decide to return `{next_state, blocked, _}', %% and that's an invalid response for a state enter call. @@ -214,7 +208,7 @@ running(cast, flush, Data) -> flush(Data); running(cast, block, St) -> {next_state, blocked, St}; -running(info, ?SEND_REQ(_From, _Req) = Request0, Data) -> +running(info, ?SEND_REQ(_ReplyTo, _Req) = Request0, Data) -> handle_query_requests(Request0, Data); running(info, {flush, Ref}, St = #{tref := {_TRef, Ref}}) -> flush(St#{tref := undefined}); @@ -242,8 +236,8 @@ blocked(cast, flush, Data) -> resume_from_blocked(Data); blocked(state_timeout, unblock, St) -> resume_from_blocked(St); -blocked(info, ?SEND_REQ(_ReqFrom, {query, _Request, _Opts}) = Request0, Data0) -> - {_Queries, Data} = collect_and_enqueue_query_requests(Request0, Data0), +blocked(info, ?SEND_REQ(_ReplyTo, _Req) = Request0, Data0) -> + Data = collect_and_enqueue_query_requests(Request0, Data0), {keep_state, Data}; blocked(info, {flush, _Ref}, _Data) -> keep_state_and_data; @@ -270,15 +264,17 @@ code_change(_OldVsn, State, _Extra) -> %%============================================================================== -define(PICK(ID, KEY, PID, EXPR), - try gproc_pool:pick_worker(ID, KEY) of - PID when is_pid(PID) -> - EXPR; - _ -> - ?RESOURCE_ERROR(worker_not_created, "resource not created") + try + case gproc_pool:pick_worker(ID, KEY) of + PID when is_pid(PID) -> + EXPR; + _ -> + ?RESOURCE_ERROR(worker_not_created, "resource not created") + end catch error:badarg -> ?RESOURCE_ERROR(worker_not_created, "resource not created"); - exit:{timeout, _} -> + error:timeout -> ?RESOURCE_ERROR(timeout, "call resource timeout") end ). @@ -288,7 +284,8 @@ pick_call(Id, Key, Query, Timeout) -> Caller = self(), MRef = erlang:monitor(process, Pid, [{alias, reply_demonitor}]), From = {Caller, MRef}, - erlang:send(Pid, ?SEND_REQ(From, Query)), + ReplyTo = {fun gen_statem:reply/2, [From]}, + erlang:send(Pid, ?SEND_REQ(ReplyTo, Query)), receive {MRef, Response} -> erlang:demonitor(MRef, [flush]), @@ -308,8 +305,8 @@ pick_call(Id, Key, Query, Timeout) -> pick_cast(Id, Key, Query) -> ?PICK(Id, Key, Pid, begin - From = undefined, - erlang:send(Pid, ?SEND_REQ(From, Query)), + ReplyTo = undefined, + erlang:send(Pid, ?SEND_REQ(ReplyTo, Query)), ok end). @@ -370,8 +367,8 @@ retry_inflight_sync(Ref, QueryOrBatch, Data0) -> Result = call_query(sync, Id, Index, Ref, QueryOrBatch, QueryOpts), ReplyResult = case QueryOrBatch of - ?QUERY(From, CoreReq, HasBeenSent, _ExpireAt) -> - Reply = ?REPLY(From, CoreReq, HasBeenSent, Result), + ?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt) -> + Reply = ?REPLY(ReplyTo, HasBeenSent, Result), reply_caller_defer_metrics(Id, Reply, QueryOpts); [?QUERY(_, _, _, _) | _] = Batch -> batch_reply_caller_defer_metrics(Id, Result, Batch, QueryOpts) @@ -412,7 +409,7 @@ retry_inflight_sync(Ref, QueryOrBatch, Data0) -> -spec handle_query_requests(?SEND_REQ(request_from(), request()), data()) -> gen_statem:event_handler_result(state(), data()). handle_query_requests(Request0, Data0) -> - {_Queries, Data} = collect_and_enqueue_query_requests(Request0, Data0), + Data = collect_and_enqueue_query_requests(Request0, Data0), maybe_flush(Data). collect_and_enqueue_query_requests(Request0, Data0) -> @@ -425,21 +422,37 @@ collect_and_enqueue_query_requests(Request0, Data0) -> Queries = lists:map( fun - (?SEND_REQ(undefined = _From, {query, Req, Opts})) -> + (?SEND_REQ(undefined = _ReplyTo, {query, Req, Opts})) -> ReplyFun = maps:get(async_reply_fun, Opts, undefined), HasBeenSent = false, ExpireAt = maps:get(expire_at, Opts), ?QUERY(ReplyFun, Req, HasBeenSent, ExpireAt); - (?SEND_REQ(From, {query, Req, Opts})) -> + (?SEND_REQ(ReplyTo, {query, Req, Opts})) -> HasBeenSent = false, ExpireAt = maps:get(expire_at, Opts), - ?QUERY(From, Req, HasBeenSent, ExpireAt) + ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt) end, Requests ), - NewQ = append_queue(Id, Index, Q, Queries), - Data = Data0#{queue := NewQ}, - {Queries, Data}. + {Overflown, NewQ} = append_queue(Id, Index, Q, Queries), + ok = reply_overflown(Overflown), + Data0#{queue := NewQ}. + +reply_overflown([]) -> + ok; +reply_overflown([?QUERY(ReplyTo, _Req, _HasBeenSent, _ExpireAt) | More]) -> + do_reply_caller(ReplyTo, {error, buffer_overflow}), + reply_overflown(More). + +do_reply_caller(undefined, _Result) -> + ok; +do_reply_caller({F, Args}, {async_return, Result}) -> + %% this is an early return to async caller, the retry + %% decision has to be made by the caller + do_reply_caller({F, Args}, Result); +do_reply_caller({F, Args}, Result) when is_function(F) -> + _ = erlang:apply(F, Args ++ [Result]), + ok. maybe_flush(Data0) -> #{ @@ -498,7 +511,7 @@ flush(Data0) -> buffer_worker_flush_potentially_partial, #{expired => Expired, not_expired => NotExpired} ), - Ref = make_message_ref(), + Ref = make_request_ref(), do_flush(Data2, #{ new_queue => Q1, is_batch => IsBatch, @@ -533,10 +546,10 @@ do_flush( inflight_tid := InflightTID } = Data0, %% unwrap when not batching (i.e., batch size == 1) - [?QUERY(From, CoreReq, HasBeenSent, _ExpireAt) = Request] = Batch, + [?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt) = Request] = Batch, QueryOpts = #{inflight_tid => InflightTID, simple_query => false}, Result = call_query(configured, Id, Index, Ref, Request, QueryOpts), - Reply = ?REPLY(From, CoreReq, HasBeenSent, Result), + Reply = ?REPLY(ReplyTo, HasBeenSent, Result), case reply_caller(Id, Reply, QueryOpts) of %% Failed; remove the request from the queue, as we cannot pop %% from it again, but we'll retry it using the inflight table. @@ -690,6 +703,14 @@ batch_reply_caller(Id, BatchResult, Batch, QueryOpts) -> ShouldBlock. batch_reply_caller_defer_metrics(Id, BatchResult, Batch, QueryOpts) -> + %% the `Mod:on_batch_query/3` returns a single result for a batch, + %% so we need to expand + Replies = lists:map( + fun(?QUERY(FROM, _REQUEST, SENT, _EXPIRE_AT)) -> + ?REPLY(FROM, SENT, BatchResult) + end, + Batch + ), {ShouldAck, PostFns} = lists:foldl( fun(Reply, {_ShouldAck, PostFns}) -> @@ -697,9 +718,7 @@ batch_reply_caller_defer_metrics(Id, BatchResult, Batch, QueryOpts) -> {ShouldAck, [PostFn | PostFns]} end, {ack, []}, - %% the `Mod:on_batch_query/3` returns a single result for a batch, - %% so we need to expand - ?EXPAND(BatchResult, Batch) + Replies ), PostFn = fun() -> lists:foreach(fun(F) -> F() end, PostFns) end, {ShouldAck, PostFn}. @@ -711,48 +730,23 @@ reply_caller(Id, Reply, QueryOpts) -> %% Should only reply to the caller when the decision is final (not %% retriable). See comment on `handle_query_result_pure'. -reply_caller_defer_metrics(Id, ?REPLY(undefined, _, HasBeenSent, Result), _QueryOpts) -> +reply_caller_defer_metrics(Id, ?REPLY(undefined, HasBeenSent, Result), _QueryOpts) -> handle_query_result_pure(Id, Result, HasBeenSent); -reply_caller_defer_metrics(Id, ?REPLY({ReplyFun, Args}, _, HasBeenSent, Result), QueryOpts) when - is_function(ReplyFun) --> +reply_caller_defer_metrics(Id, ?REPLY(ReplyTo, HasBeenSent, Result), QueryOpts) -> IsSimpleQuery = maps:get(simple_query, QueryOpts, false), IsUnrecoverableError = is_unrecoverable_error(Result), {ShouldAck, PostFn} = handle_query_result_pure(Id, Result, HasBeenSent), case {ShouldAck, Result, IsUnrecoverableError, IsSimpleQuery} of {ack, {async_return, _}, true, _} -> - apply(ReplyFun, Args ++ [Result]), - ok; + ok = do_reply_caller(ReplyTo, Result); {ack, {async_return, _}, false, _} -> ok; {_, _, _, true} -> - apply(ReplyFun, Args ++ [Result]), - ok; + ok = do_reply_caller(ReplyTo, Result); {nack, _, _, _} -> ok; {ack, _, _, _} -> - apply(ReplyFun, Args ++ [Result]), - ok - end, - {ShouldAck, PostFn}; -reply_caller_defer_metrics(Id, ?REPLY(From, _, HasBeenSent, Result), QueryOpts) -> - IsSimpleQuery = maps:get(simple_query, QueryOpts, false), - IsUnrecoverableError = is_unrecoverable_error(Result), - {ShouldAck, PostFn} = handle_query_result_pure(Id, Result, HasBeenSent), - case {ShouldAck, Result, IsUnrecoverableError, IsSimpleQuery} of - {ack, {async_return, _}, true, _} -> - gen_statem:reply(From, Result), - ok; - {ack, {async_return, _}, false, _} -> - ok; - {_, _, _, true} -> - gen_statem:reply(From, Result), - ok; - {nack, _, _, _} -> - ok; - {ack, _, _, _} -> - gen_statem:reply(From, Result), - ok + ok = do_reply_caller(ReplyTo, Result) end, {ShouldAck, PostFn}. @@ -857,23 +851,33 @@ handle_async_worker_down(Data0, Pid) -> call_query(QM0, Id, Index, Ref, Query, QueryOpts) -> ?tp(call_query_enter, #{id => Id, query => Query}), case emqx_resource_manager:ets_lookup(Id) of - {ok, _Group, #{mod := Mod, state := ResSt, status := connected} = Data} -> - QM = - case QM0 =:= configured of - true -> maps:get(query_mode, Data); - false -> QM0 - end, - CBM = maps:get(callback_mode, Data), - CallMode = call_mode(QM, CBM), - apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, QueryOpts); {ok, _Group, #{status := stopped}} -> ?RESOURCE_ERROR(stopped, "resource stopped or disabled"); - {ok, _Group, #{status := S}} when S == connecting; S == disconnected -> - ?RESOURCE_ERROR(not_connected, "resource not connected"); + {ok, _Group, Resource} -> + QM = + case QM0 =:= configured of + true -> maps:get(query_mode, Resource); + false -> QM0 + end, + do_call_query(QM, Id, Index, Ref, Query, QueryOpts, Resource); {error, not_found} -> ?RESOURCE_ERROR(not_found, "resource not found") end. +do_call_query(QM, Id, Index, Ref, Query, #{is_buffer_supported := true} = QueryOpts, Resource) -> + %% The connector supports buffer, send even in disconnected state + #{mod := Mod, state := ResSt, callback_mode := CBM} = Resource, + CallMode = call_mode(QM, CBM), + apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, QueryOpts); +do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{status := connected} = Resource) -> + %% when calling from the buffer worker or other simple queries, + %% only apply the query fun when it's at connected status + #{mod := Mod, state := ResSt, callback_mode := CBM} = Resource, + CallMode = call_mode(QM, CBM), + apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, QueryOpts); +do_call_query(_QM, _Id, _Index, _Ref, _Query, _QueryOpts, _Data) -> + ?RESOURCE_ERROR(not_connected, "resource not connected"). + -define(APPLY_RESOURCE(NAME, EXPR, REQ), try %% if the callback module (connector) wants to return an error that @@ -903,13 +907,21 @@ apply_query_fun(async, Mod, Id, Index, Ref, ?QUERY(_, Request, _, _) = Query, Re ?APPLY_RESOURCE( call_query_async, begin - ReplyFun = fun ?MODULE:reply_after_query/8, - Args = [self(), Id, Index, InflightTID, Ref, Query, QueryOpts], + ReplyFun = fun ?MODULE:handle_async_reply/2, + ReplyContext = #{ + buffer_worker => self(), + resource_id => Id, + worker_index => Index, + inflight_tid => InflightTID, + request_ref => Ref, + query_opts => QueryOpts, + query => minimize(Query) + }, IsRetriable = false, WorkerMRef = undefined, InflightItem = ?INFLIGHT_ITEM(Ref, Query, IsRetriable, WorkerMRef), ok = inflight_append(InflightTID, InflightItem, Id, Index), - Result = Mod:on_query_async(Id, Request, {ReplyFun, Args}, ResSt), + Result = Mod:on_query_async(Id, Request, {ReplyFun, [ReplyContext]}, ResSt), {async_return, Result} end, Request @@ -918,7 +930,7 @@ apply_query_fun(sync, Mod, Id, _Index, _Ref, [?QUERY(_, _, _, _) | _] = Batch, R ?tp(call_batch_query, #{ id => Id, mod => Mod, batch => Batch, res_st => ResSt, call_mode => sync }), - Requests = [Request || ?QUERY(_From, Request, _, _ExpireAt) <- Batch], + Requests = lists:map(fun(?QUERY(_ReplyTo, Request, _, _ExpireAt)) -> Request end, Batch), ?APPLY_RESOURCE(call_batch_query, Mod:on_batch_query(Id, Requests, ResSt), Batch); apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, ResSt, QueryOpts) -> ?tp(call_batch_query_async, #{ @@ -928,32 +940,43 @@ apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, Re ?APPLY_RESOURCE( call_batch_query_async, begin - ReplyFun = fun ?MODULE:batch_reply_after_query/8, - ReplyFunAndArgs = {ReplyFun, [self(), Id, Index, InflightTID, Ref, Batch, QueryOpts]}, - Requests = [Request || ?QUERY(_From, Request, _, _ExpireAt) <- Batch], + ReplyFun = fun ?MODULE:handle_async_batch_reply/2, + ReplyContext = #{ + buffer_worker => self(), + resource_id => Id, + worker_index => Index, + inflight_tid => InflightTID, + request_ref => Ref, + query_opts => QueryOpts, + batch => minimize(Batch) + }, + Requests = lists:map( + fun(?QUERY(_ReplyTo, Request, _, _ExpireAt)) -> Request end, Batch + ), IsRetriable = false, WorkerMRef = undefined, InflightItem = ?INFLIGHT_ITEM(Ref, Batch, IsRetriable, WorkerMRef), ok = inflight_append(InflightTID, InflightItem, Id, Index), - Result = Mod:on_batch_query_async(Id, Requests, ReplyFunAndArgs, ResSt), + Result = Mod:on_batch_query_async(Id, Requests, {ReplyFun, [ReplyContext]}, ResSt), {async_return, Result} end, Batch ). -reply_after_query( - Pid, - Id, - Index, - InflightTID, - Ref, - ?QUERY(_From, _Request, _HasBeenSent, ExpireAt) = Query, - QueryOpts, +handle_async_reply( + #{ + request_ref := Ref, + inflight_tid := InflightTID, + resource_id := Id, + worker_index := Index, + buffer_worker := Pid, + query := ?QUERY(_, _, _, ExpireAt) = _Query + } = ReplyContext, Result ) -> ?tp( - buffer_worker_reply_after_query_enter, - #{batch_or_query => [Query], ref => Ref} + handle_async_reply_enter, + #{batch_or_query => [_Query], ref => Ref} ), Now = now_(), case is_expired(ExpireAt, Now) of @@ -962,52 +985,60 @@ reply_after_query( IsAcked = ack_inflight(InflightTID, Ref, Id, Index), IsAcked andalso emqx_resource_metrics:late_reply_inc(Id), IsFullBefore andalso ?MODULE:flush_worker(Pid), - ?tp(buffer_worker_reply_after_query_expired, #{expired => [Query]}), + ?tp(handle_async_reply_expired, #{expired => [_Query]}), ok; false -> - do_reply_after_query(Pid, Id, Index, InflightTID, Ref, Query, QueryOpts, Result) + do_handle_async_reply(ReplyContext, Result) end. -do_reply_after_query( - Pid, - Id, - Index, - InflightTID, - Ref, - ?QUERY(From, Request, HasBeenSent, _ExpireAt), - QueryOpts, +do_handle_async_reply( + #{ + query_opts := QueryOpts, + resource_id := Id, + request_ref := Ref, + worker_index := Index, + buffer_worker := Pid, + inflight_tid := InflightTID, + query := ?QUERY(ReplyTo, _, Sent, _ExpireAt) = _Query + }, Result ) -> %% NOTE: 'inflight' is the count of messages that were sent async %% but received no ACK, NOT the number of messages queued in the %% inflight window. {Action, PostFn} = reply_caller_defer_metrics( - Id, ?REPLY(From, Request, HasBeenSent, Result), QueryOpts + Id, ?REPLY(ReplyTo, Sent, Result), QueryOpts ), + + ?tp(handle_async_reply, #{ + action => Action, + batch_or_query => [_Query], + ref => Ref, + result => Result + }), + case Action of nack -> %% Keep retrying. - ?tp(buffer_worker_reply_after_query, #{ - action => Action, - batch_or_query => ?QUERY(From, Request, HasBeenSent, _ExpireAt), - ref => Ref, - result => Result - }), mark_inflight_as_retriable(InflightTID, Ref), ?MODULE:block(Pid); ack -> - ?tp(buffer_worker_reply_after_query, #{ - action => Action, - batch_or_query => ?QUERY(From, Request, HasBeenSent, _ExpireAt), - ref => Ref, - result => Result - }), do_ack(InflightTID, Ref, Id, Index, PostFn, Pid, QueryOpts) end. -batch_reply_after_query(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Result) -> +handle_async_batch_reply( + #{ + buffer_worker := Pid, + resource_id := Id, + worker_index := Index, + inflight_tid := InflightTID, + request_ref := Ref, + batch := Batch + } = ReplyContext, + Result +) -> ?tp( - buffer_worker_reply_after_query_enter, + handle_async_reply_enter, #{batch_or_query => Batch, ref => Ref} ), Now = now_(), @@ -1017,45 +1048,41 @@ batch_reply_after_query(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Resu IsAcked = ack_inflight(InflightTID, Ref, Id, Index), IsAcked andalso emqx_resource_metrics:late_reply_inc(Id), IsFullBefore andalso ?MODULE:flush_worker(Pid), - ?tp(buffer_worker_reply_after_query_expired, #{expired => Batch}), + ?tp(handle_async_reply_expired, #{expired => Batch}), ok; {NotExpired, Expired} -> NumExpired = length(Expired), emqx_resource_metrics:late_reply_inc(Id, NumExpired), NumExpired > 0 andalso - ?tp(buffer_worker_reply_after_query_expired, #{expired => Expired}), - do_batch_reply_after_query( - Pid, Id, Index, InflightTID, Ref, NotExpired, QueryOpts, Result - ) + ?tp(handle_async_reply_expired, #{expired => Expired}), + do_handle_async_batch_reply(ReplyContext#{batch := NotExpired}, Result) end. -do_batch_reply_after_query(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Result) -> - ?tp( - buffer_worker_reply_after_query_enter, - #{batch_or_query => Batch, ref => Ref} - ), - %% NOTE: 'inflight' is the count of messages that were sent async - %% but received no ACK, NOT the number of messages queued in the - %% inflight window. +do_handle_async_batch_reply( + #{ + buffer_worker := Pid, + resource_id := Id, + worker_index := Index, + inflight_tid := InflightTID, + request_ref := Ref, + batch := Batch, + query_opts := QueryOpts + }, + Result +) -> {Action, PostFn} = batch_reply_caller_defer_metrics(Id, Result, Batch, QueryOpts), + ?tp(handle_async_reply, #{ + action => Action, + batch_or_query => Batch, + ref => Ref, + result => Result + }), case Action of nack -> %% Keep retrying. - ?tp(buffer_worker_reply_after_query, #{ - action => nack, - batch_or_query => Batch, - ref => Ref, - result => Result - }), mark_inflight_as_retriable(InflightTID, Ref), ?MODULE:block(Pid); ack -> - ?tp(buffer_worker_reply_after_query, #{ - action => ack, - batch_or_query => Batch, - ref => Ref, - result => Result - }), do_ack(InflightTID, Ref, Id, Index, PostFn, Pid, QueryOpts) end. @@ -1083,23 +1110,30 @@ queue_item_marshaller(Item) -> estimate_size(QItem) -> erlang:external_size(QItem). --spec append_queue(id(), index(), replayq:q(), [queue_query()]) -> replayq:q(). -append_queue(Id, Index, Q, Queries) when not is_binary(Q) -> - %% we must not append a raw binary because the marshaller will get - %% lost. +-spec append_queue(id(), index(), replayq:q(), [queue_query()]) -> + {[queue_query()], replayq:q()}. +append_queue(Id, Index, Q, Queries) -> + %% this assertion is to ensure that we never append a raw binary + %% because the marshaller will get lost. + false = is_binary(hd(Queries)), Q0 = replayq:append(Q, Queries), - Q2 = + {Overflown, Q2} = case replayq:overflow(Q0) of - Overflow when Overflow =< 0 -> - Q0; - Overflow -> - PopOpts = #{bytes_limit => Overflow, count_limit => 999999999}, + OverflownBytes when OverflownBytes =< 0 -> + {[], Q0}; + OverflownBytes -> + PopOpts = #{bytes_limit => OverflownBytes, count_limit => 999999999}, {Q1, QAckRef, Items2} = replayq:pop(Q0, PopOpts), ok = replayq:ack(Q1, QAckRef), Dropped = length(Items2), - emqx_resource_metrics:dropped_queue_full_inc(Id), - ?SLOG(error, #{msg => drop_query, reason => queue_full, dropped => Dropped}), - Q1 + emqx_resource_metrics:dropped_queue_full_inc(Id, Dropped), + ?SLOG(info, #{ + msg => buffer_worker_overflow, + resource_id => Id, + worker_index => Index, + dropped => Dropped + }), + {Items2, Q1} end, emqx_resource_metrics:queuing_set(Id, Index, queue_count(Q2)), ?tp( @@ -1107,10 +1141,11 @@ append_queue(Id, Index, Q, Queries) when not is_binary(Q) -> #{ id => Id, items => Queries, - queue_count => queue_count(Q2) + queue_count => queue_count(Q2), + overflown => length(Overflown) } ), - Q2. + {Overflown, Q2}. %%============================================================================== %% the inflight queue for async query @@ -1119,6 +1154,10 @@ append_queue(Id, Index, Q, Queries) when not is_binary(Q) -> -define(INITIAL_TIME_REF, initial_time). -define(INITIAL_MONOTONIC_TIME_REF, initial_monotonic_time). +%% NOTE +%% There are 4 metadata rows in an inflight table, keyed by atoms declared above. ☝ +-define(INFLIGHT_META_ROWS, 4). + inflight_new(InfltWinSZ, Id, Index) -> TableId = ets:new( emqx_resource_buffer_worker_inflight_tab, @@ -1130,7 +1169,7 @@ inflight_new(InfltWinSZ, Id, Index) -> inflight_append(TableId, {?SIZE_REF, 0}, Id, Index), inflight_append(TableId, {?INITIAL_TIME_REF, erlang:system_time()}, Id, Index), inflight_append( - TableId, {?INITIAL_MONOTONIC_TIME_REF, make_message_ref()}, Id, Index + TableId, {?INITIAL_MONOTONIC_TIME_REF, make_request_ref()}, Id, Index ), TableId. @@ -1151,7 +1190,7 @@ inflight_get_first_retriable(InflightTID, Now) -> case ets:select(InflightTID, MatchSpec, _Limit = 1) of '$end_of_table' -> none; - {[{Ref, Query = ?QUERY(_From, _CoreReq, _HasBeenSent, ExpireAt)}], _Continuation} -> + {[{Ref, Query = ?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt)}], _Continuation} -> case is_expired(ExpireAt, Now) of true -> {expired, Ref, [Query]}; @@ -1179,12 +1218,9 @@ is_inflight_full(InflightTID) -> Size >= MaxSize. inflight_num_batches(InflightTID) -> - %% Note: we subtract 2 because there're 2 metadata rows that hold - %% the maximum size value and the number of messages. - MetadataRowCount = 2, case ets:info(InflightTID, size) of undefined -> 0; - Size -> max(0, Size - MetadataRowCount) + Size -> max(0, Size - ?INFLIGHT_META_ROWS) end. inflight_num_msgs(InflightTID) -> @@ -1210,7 +1246,7 @@ inflight_append( inflight_append( InflightTID, ?INFLIGHT_ITEM( - Ref, ?QUERY(_From, _Req, _HasBeenSent, _ExpireAt) = Query0, IsRetriable, WorkerMRef + Ref, ?QUERY(_ReplyTo, _Req, _HasBeenSent, _ExpireAt) = Query0, IsRetriable, WorkerMRef ), Id, Index @@ -1369,8 +1405,8 @@ cancel_flush_timer(St = #{tref := {TRef, _Ref}}) -> _ = erlang:cancel_timer(TRef), St#{tref => undefined}. --spec make_message_ref() -> inflight_key(). -make_message_ref() -> +-spec make_request_ref() -> inflight_key(). +make_request_ref() -> now_(). collect_requests(Acc, Limit) -> @@ -1381,7 +1417,7 @@ do_collect_requests(Acc, Count, Limit) when Count >= Limit -> lists:reverse(Acc); do_collect_requests(Acc, Count, Limit) -> receive - ?SEND_REQ(_From, _Req) = Request -> + ?SEND_REQ(_ReplyTo, _Req) = Request -> do_collect_requests([Request | Acc], Count + 1, Limit) after 0 -> lists:reverse(Acc) @@ -1389,9 +1425,9 @@ do_collect_requests(Acc, Count, Limit) -> mark_as_sent(Batch) when is_list(Batch) -> lists:map(fun mark_as_sent/1, Batch); -mark_as_sent(?QUERY(From, Req, _HasBeenSent, ExpireAt)) -> +mark_as_sent(?QUERY(ReplyTo, Req, _HasBeenSent, ExpireAt)) -> HasBeenSent = true, - ?QUERY(From, Req, HasBeenSent, ExpireAt). + ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt). is_unrecoverable_error({error, {unrecoverable_error, _}}) -> true; @@ -1415,7 +1451,7 @@ is_async_return(_) -> sieve_expired_requests(Batch, Now) -> {Expired, NotExpired} = lists:partition( - fun(?QUERY(_From, _CoreReq, _HasBeenSent, ExpireAt)) -> + fun(?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt)) -> is_expired(ExpireAt, Now) end, Batch @@ -1456,3 +1492,15 @@ ensure_expire_at(#{timeout := TimeoutMS} = Opts) -> TimeoutNS = erlang:convert_time_unit(TimeoutMS, millisecond, nanosecond), ExpireAt = now_() + TimeoutNS, Opts#{expire_at => ExpireAt}. + +%% no need to keep the request for async reply handler +minimize(?QUERY(_, _, _, _) = Q) -> + do_minimize(Q); +minimize(L) when is_list(L) -> + lists:map(fun do_minimize/1, L). + +-ifdef(TEST). +do_minimize(?QUERY(_ReplyTo, _Req, _Sent, _ExpireAt) = Query) -> Query. +-else. +do_minimize(?QUERY(ReplyTo, _Req, Sent, ExpireAt)) -> ?QUERY(ReplyTo, [], Sent, ExpireAt). +-endif. diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl index 4987946c9..a00dcdcd2 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl @@ -23,7 +23,7 @@ %% External API -export([start_link/0]). --export([start_workers/2, stop_workers/2]). +-export([start_workers/2, stop_workers/2, worker_pids/1]). %% Callbacks -export([init/1]). @@ -75,6 +75,14 @@ stop_workers(ResId, Opts) -> ensure_worker_pool_removed(ResId), ok. +worker_pids(ResId) -> + lists:map( + fun({_Name, Pid}) -> + Pid + end, + gproc_pool:active_workers(ResId) + ). + %%%============================================================================= %%% Internal %%%============================================================================= diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 8098dbe42..db4441d88 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -194,7 +194,7 @@ remove(ResId, ClearMetrics) when is_binary(ResId) -> restart(ResId, Opts) when is_binary(ResId) -> case safe_call(ResId, restart, ?T_OPERATION) of ok -> - wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), + _ = wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), ok; {error, _Reason} = Error -> Error @@ -205,7 +205,7 @@ restart(ResId, Opts) when is_binary(ResId) -> start(ResId, Opts) -> case safe_call(ResId, start, ?T_OPERATION) of ok -> - wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), + _ = wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), ok; {error, _Reason} = Error -> Error @@ -309,6 +309,7 @@ init({Data, Opts}) -> end. terminate(_Reason, _State, Data) -> + _ = stop_resource(Data), _ = maybe_clear_alarm(Data#data.id), delete_cache(Data#data.id, Data#data.manager_id), ok. @@ -334,8 +335,7 @@ handle_event({call, From}, start, _State, _Data) -> % Called when the resource received a `quit` message handle_event(info, quit, stopped, _Data) -> {stop, {shutdown, quit}}; -handle_event(info, quit, _State, Data) -> - _ = stop_resource(Data), +handle_event(info, quit, _State, _Data) -> {stop, {shutdown, quit}}; % Called when the resource is to be stopped handle_event({call, From}, stop, stopped, _Data) -> @@ -487,7 +487,7 @@ start_resource(Data, From) -> Actions = maybe_reply([{state_timeout, 0, health_check}], From, ok), {next_state, connecting, UpdatedData, Actions}; {error, Reason} = Err -> - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => start_resource_failed, id => Data#data.id, reason => Reason @@ -546,7 +546,7 @@ handle_connected_health_check(Data) -> Actions = [{state_timeout, health_check_interval(Data#data.opts), health_check}], {keep_state, UpdatedData, Actions}; (Status, UpdatedData) -> - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => health_check_failed, id => Data#data.id, status => Status @@ -555,12 +555,14 @@ handle_connected_health_check(Data) -> end ). +with_health_check(#data{state = undefined} = Data, Func) -> + Func(disconnected, Data); with_health_check(Data, Func) -> ResId = Data#data.id, HCRes = emqx_resource:call_health_check(Data#data.manager_id, Data#data.mod, Data#data.state), {Status, NewState, Err} = parse_health_check_result(HCRes, Data), _ = maybe_alarm(Status, ResId), - ok = maybe_resume_resource_workers(Status), + ok = maybe_resume_resource_workers(ResId, Status), UpdatedData = Data#data{ state = NewState, status = Status, error = Err }, @@ -581,14 +583,12 @@ maybe_alarm(_Status, ResId) -> <<"resource down: ", ResId/binary>> ). -maybe_resume_resource_workers(connected) -> +maybe_resume_resource_workers(ResId, connected) -> lists:foreach( - fun({_, Pid, _, _}) -> - emqx_resource_buffer_worker:resume(Pid) - end, - supervisor:which_children(emqx_resource_buffer_worker_sup) + fun emqx_resource_buffer_worker:resume/1, + emqx_resource_buffer_worker_sup:worker_pids(ResId) ); -maybe_resume_resource_workers(_) -> +maybe_resume_resource_workers(_, _) -> ok. maybe_clear_alarm(<>) -> diff --git a/apps/emqx_resource/src/schema/emqx_resource_schema.erl b/apps/emqx_resource/src/schema/emqx_resource_schema.erl index 39513e28c..fdd65bc3c 100644 --- a/apps/emqx_resource/src/schema/emqx_resource_schema.erl +++ b/apps/emqx_resource/src/schema/emqx_resource_schema.erl @@ -30,16 +30,25 @@ namespace() -> "resource_schema". roots() -> []. +fields("resource_opts_sync_only") -> + [ + {resource_opts, + mk( + ref(?MODULE, "creation_opts_sync_only"), + resource_opts_meta() + )} + ]; +fields("creation_opts_sync_only") -> + Fields0 = fields("creation_opts"), + Fields1 = lists:keydelete(async_inflight_window, 1, Fields0), + QueryMod = {query_mode, fun query_mode_sync_only/1}, + lists:keyreplace(query_mode, 1, Fields1, QueryMod); fields("resource_opts") -> [ {resource_opts, mk( ref(?MODULE, "creation_opts"), - #{ - required => false, - default => #{}, - desc => ?DESC(<<"resource_opts">>) - } + resource_opts_meta() )} ]; fields("creation_opts") -> @@ -59,6 +68,13 @@ fields("creation_opts") -> {max_queue_bytes, fun max_queue_bytes/1} ]. +resource_opts_meta() -> + #{ + required => false, + default => #{}, + desc => ?DESC(<<"resource_opts">>) + }. + worker_pool_size(type) -> non_neg_integer(); worker_pool_size(desc) -> ?DESC("worker_pool_size"); worker_pool_size(default) -> ?WORKER_POOL_SIZE; @@ -95,6 +111,12 @@ query_mode(default) -> async; query_mode(required) -> false; query_mode(_) -> undefined. +query_mode_sync_only(type) -> enum([sync]); +query_mode_sync_only(desc) -> ?DESC("query_mode_sync_only"); +query_mode_sync_only(default) -> sync; +query_mode_sync_only(required) -> false; +query_mode_sync_only(_) -> undefined. + request_timeout(type) -> hoconsc:union([infinity, emqx_schema:duration_ms()]); request_timeout(desc) -> ?DESC("request_timeout"); request_timeout(default) -> <<"15s">>; @@ -139,4 +161,6 @@ max_queue_bytes(required) -> false; max_queue_bytes(_) -> undefined. desc("creation_opts") -> + ?DESC("creation_opts"); +desc("creation_opts_sync_only") -> ?DESC("creation_opts"). diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 9b2af74f6..bc0331d02 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -19,13 +19,12 @@ -compile(export_all). -include_lib("eunit/include/eunit.hrl"). --include_lib("common_test/include/ct.hrl"). --include("emqx_resource.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -define(TEST_RESOURCE, emqx_connector_demo). -define(ID, <<"id">>). +-define(ID1, <<"id1">>). -define(DEFAULT_RESOURCE_GROUP, <<"default">>). -define(RESOURCE_ERROR(REASON), {error, {resource_error, #{reason := REASON}}}). -define(TRACE_OPTS, #{timetrap => 10000, timeout => 1000}). @@ -413,7 +412,8 @@ t_query_counter_async_inflight(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(WindowSize, ReqOpts), + %% one more so that inflight would be already full upon last query + inc_counter_in_parallel(WindowSize + 1, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 1_000 ), @@ -447,9 +447,9 @@ t_query_counter_async_inflight(_) -> %% all responses should be received after the resource is resumed. {ok, SRef0} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - %% +1 because the tmp_query above will be retried and succeed + %% +2 because the tmp_query above will be retried and succeed %% this time. - WindowSize + 1, + WindowSize + 2, _Timeout0 = 10_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), @@ -477,7 +477,7 @@ t_query_counter_async_inflight(_) -> fun(Trace) -> QueryTrace = ?of_kind(call_query_async, Trace), ?assertMatch([#{query := {query, _, {inc_counter, _}, _, _}} | _], QueryTrace), - ?assertEqual(WindowSize + Num, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), + ?assertEqual(WindowSize + Num + 1, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), ok end @@ -489,7 +489,8 @@ t_query_counter_async_inflight(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(WindowSize, ReqOpts), + %% one more so that inflight would be already full upon last query + inc_counter_in_parallel(WindowSize + 1, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 1_000 ), @@ -502,10 +503,10 @@ t_query_counter_async_inflight(_) -> %% this will block the resource_worker ok = emqx_resource:query(?ID, {inc_counter, 4}), - Sent = WindowSize + Num + WindowSize, + Sent = WindowSize + 1 + Num + WindowSize + 1, {ok, SRef1} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - WindowSize, + WindowSize + 1, _Timeout0 = 10_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), @@ -595,7 +596,8 @@ t_query_counter_async_inflight_batch(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(NumMsgs, ReqOpts), + %% a batch more so that inflight would be already full upon last query + inc_counter_in_parallel(NumMsgs + BatchSize, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 5_000 ), @@ -617,12 +619,14 @@ t_query_counter_async_inflight_batch(_) -> ), tap_metrics(?LINE), + Sent1 = NumMsgs + BatchSize, + ?check_trace( begin %% this will block the resource_worker as the inflight window is full now {ok, {ok, _}} = ?wait_async_action( - emqx_resource:query(?ID, {inc_counter, 2}), + emqx_resource:query(?ID, {inc_counter, 2}, ReqOpts()), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 5_000 ), @@ -632,6 +636,8 @@ t_query_counter_async_inflight_batch(_) -> [] ), + Sent2 = Sent1 + 1, + tap_metrics(?LINE), %% send query now will fail because the resource is blocked. Insert = fun(Tab, Ref, Result) -> @@ -654,10 +660,10 @@ t_query_counter_async_inflight_batch(_) -> %% all responses should be received after the resource is resumed. {ok, SRef0} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - %% +1 because the tmp_query above will be retried and succeed + %% +2 because the tmp_query above will be retried and succeed %% this time. - WindowSize + 1, - 10_000 + WindowSize + 2, + 5_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), tap_metrics(?LINE), @@ -665,8 +671,8 @@ t_query_counter_async_inflight_batch(_) -> %% since the previous tmp_query was enqueued to be retried, we %% take it again from the table; this time, it should have %% succeeded. - ?assertMatch([{tmp_query, ok}], ets:take(Tab0, tmp_query)), - ?assertEqual(NumMsgs, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), + ?assertEqual([{tmp_query, ok}], ets:take(Tab0, tmp_query)), + ?assertEqual(Sent2, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), %% send async query, this time everything should be ok. @@ -678,7 +684,7 @@ t_query_counter_async_inflight_batch(_) -> {ok, SRef} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), NumBatches1, - 10_000 + 5_000 ), inc_counter_in_parallel(NumMsgs1, ReqOpts), {ok, _} = snabbkaffe:receive_events(SRef), @@ -692,11 +698,10 @@ t_query_counter_async_inflight_batch(_) -> ) end ), - ?assertEqual( - NumMsgs + NumMsgs1, - ets:info(Tab0, size), - #{tab => ets:tab2list(Tab0)} - ), + + Sent3 = Sent2 + NumMsgs1, + + ?assertEqual(Sent3, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), %% block the resource @@ -705,7 +710,8 @@ t_query_counter_async_inflight_batch(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(NumMsgs, ReqOpts), + %% a batch more so that inflight would be already full upon last query + inc_counter_in_parallel(NumMsgs + BatchSize, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 5_000 ), @@ -718,22 +724,23 @@ t_query_counter_async_inflight_batch(_) -> end ), + Sent4 = Sent3 + NumMsgs + BatchSize, + %% this will block the resource_worker ok = emqx_resource:query(?ID, {inc_counter, 1}), - Sent = NumMsgs + NumMsgs1 + NumMsgs, {ok, SRef1} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - WindowSize, - 10_000 + WindowSize + 1, + 5_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), {ok, _} = snabbkaffe:receive_events(SRef1), - ?assertEqual(Sent, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), + ?assertEqual(Sent4, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), {ok, Counter} = emqx_resource:simple_sync_query(?ID, get_counter), - ct:pal("get_counter: ~p, sent: ~p", [Counter, Sent]), - ?assert(Sent =< Counter), + ct:pal("get_counter: ~p, sent: ~p", [Counter, Sent4]), + ?assert(Sent4 =< Counter), %% give the metrics some time to stabilize. ct:sleep(1000), @@ -772,7 +779,10 @@ t_healthy_timeout(_) -> %% the ?TEST_RESOURCE always returns the `Mod:on_get_status/2` 300ms later. #{health_check_interval => 200} ), - ?assertError(timeout, emqx_resource:query(?ID, get_state, #{timeout => 1_000})), + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, + emqx_resource:query(?ID, get_state, #{timeout => 1_000}) + ), ?assertMatch({ok, _Group, #{status := disconnected}}, emqx_resource_manager:ets_lookup(?ID)), ok = emqx_resource:remove_local(?ID). @@ -1020,6 +1030,63 @@ t_auto_retry(_) -> ), ?assertEqual(ok, Res). +t_health_check_disconnected(_) -> + _ = emqx_resource:create_local( + ?ID, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource, create_error => true}, + #{auto_retry_interval => 100} + ), + ?assertEqual( + {ok, disconnected}, + emqx_resource:health_check(?ID) + ). + +t_unblock_only_required_buffer_workers(_) -> + {ok, _} = emqx_resource:create( + ?ID, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource}, + #{ + query_mode => async, + batch_size => 5 + } + ), + lists:foreach( + fun emqx_resource_buffer_worker:block/1, + emqx_resource_buffer_worker_sup:worker_pids(?ID) + ), + emqx_resource:create( + ?ID1, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource}, + #{ + query_mode => async, + batch_size => 5 + } + ), + %% creation of `?ID1` should not have unblocked `?ID`'s buffer workers + %% so we should see resumes now (`buffer_worker_enter_running`). + ?check_trace( + ?wait_async_action( + lists:foreach( + fun emqx_resource_buffer_worker:resume/1, + emqx_resource_buffer_worker_sup:worker_pids(?ID) + ), + #{?snk_kind := buffer_worker_enter_running}, + 5000 + ), + fun(Trace) -> + ?assertMatch( + [#{id := ?ID} | _], + ?of_kind(buffer_worker_enter_running, Trace) + ) + end + ). + t_retry_batch(_Config) -> {ok, _} = emqx_resource:create( ?ID, @@ -1226,8 +1293,8 @@ t_always_overflow(_Config) -> Payload = binary:copy(<<"a">>, 100), %% since it's sync and it should never send a request, this %% errors with `timeout'. - ?assertError( - timeout, + ?assertEqual( + {error, buffer_overflow}, emqx_resource:query( ?ID, {big_payload, Payload}, @@ -1583,8 +1650,8 @@ do_t_expiration_before_sending(QueryMode) -> spawn_link(fun() -> case QueryMode of sync -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query(?ID, {inc_counter, 99}, #{timeout => TimeoutMS}) ); async -> @@ -1690,8 +1757,8 @@ do_t_expiration_before_sending_partial_batch(QueryMode) -> spawn_link(fun() -> case QueryMode of sync -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query(?ID, {inc_counter, 199}, #{timeout => TimeoutMS}) ); async -> @@ -1717,7 +1784,7 @@ do_t_expiration_before_sending_partial_batch(QueryMode) -> async -> {ok, _} = ?block_until( #{ - ?snk_kind := buffer_worker_reply_after_query, + ?snk_kind := handle_async_reply, action := ack, batch_or_query := [{query, _, {inc_counter, 99}, _, _}] }, @@ -1848,7 +1915,7 @@ do_t_expiration_async_after_reply(IsBatch) -> ?force_ordering( #{?snk_kind := delay}, #{ - ?snk_kind := buffer_worker_reply_after_query_enter, + ?snk_kind := handle_async_reply_enter, batch_or_query := [{query, _, {inc_counter, 199}, _, _} | _] } ), @@ -1873,7 +1940,7 @@ do_t_expiration_async_after_reply(IsBatch) -> #{?snk_kind := buffer_worker_flush_potentially_partial}, 4 * TimeoutMS ), {ok, _} = ?block_until( - #{?snk_kind := buffer_worker_reply_after_query_expired}, 10 * TimeoutMS + #{?snk_kind := handle_async_reply_expired}, 10 * TimeoutMS ), unlink(Pid0), @@ -1887,7 +1954,7 @@ do_t_expiration_async_after_reply(IsBatch) -> expired := [{query, _, {inc_counter, 199}, _, _}] } ], - ?of_kind(buffer_worker_reply_after_query_expired, Trace) + ?of_kind(handle_async_reply_expired, Trace) ), wait_telemetry_event(success, #{n_events => 1, timeout => 4_000}), Metrics = tap_metrics(?LINE), @@ -1935,7 +2002,7 @@ t_expiration_batch_all_expired_after_reply(_Config) -> ?force_ordering( #{?snk_kind := delay}, #{ - ?snk_kind := buffer_worker_reply_after_query_enter, + ?snk_kind := handle_async_reply_enter, batch_or_query := [{query, _, {inc_counter, 199}, _, _} | _] } ), @@ -1954,7 +2021,7 @@ t_expiration_batch_all_expired_after_reply(_Config) -> end), {ok, _} = ?block_until( - #{?snk_kind := buffer_worker_reply_after_query_expired}, 10 * TimeoutMS + #{?snk_kind := handle_async_reply_expired}, 10 * TimeoutMS ), unlink(Pid0), @@ -1968,7 +2035,7 @@ t_expiration_batch_all_expired_after_reply(_Config) -> expired := [{query, _, {inc_counter, 199}, _, _}] } ], - ?of_kind(buffer_worker_reply_after_query_expired, Trace) + ?of_kind(handle_async_reply_expired, Trace) ), Metrics = tap_metrics(?LINE), ?assertMatch( @@ -2043,8 +2110,8 @@ do_t_expiration_retry(IsBatch) -> ResumeInterval * 2 ), spawn_link(fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query( ?ID, {inc_counter, 1}, @@ -2127,8 +2194,8 @@ t_expiration_retry_batch_multiple_times(_Config) -> ), TimeoutMS = 100, spawn_link(fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query( ?ID, {inc_counter, 1}, @@ -2137,8 +2204,8 @@ t_expiration_retry_batch_multiple_times(_Config) -> ) end), spawn_link(fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query( ?ID, {inc_counter, 2}, @@ -2334,7 +2401,7 @@ assert_async_retry_fail_then_succeed_inflight(Trace) -> ct:pal(" ~p", [Trace]), ?assert( ?strict_causality( - #{?snk_kind := buffer_worker_reply_after_query, action := nack, ref := _Ref}, + #{?snk_kind := handle_async_reply, action := nack}, #{?snk_kind := buffer_worker_retry_inflight_failed, ref := _Ref}, Trace ) diff --git a/bin/emqx b/bin/emqx index ebf10bada..132d8cba7 100755 --- a/bin/emqx +++ b/bin/emqx @@ -159,7 +159,7 @@ usage() { echo "Evaluate an Erlang expression in the EMQX node, even on Elixir node" ;; versions) - echo "List installed EMQX versions and their status" + echo "List installed EMQX release versions and their status" ;; unpack) echo "Usage: $REL_NAME unpack [VERSION]" @@ -217,12 +217,12 @@ usage() { echo " ctl: Administration commands, execute '$REL_NAME ctl help' for more details" echo '' echo "More:" - echo " Shell attach: remote_console | attach" - echo " Up/Down-grade: upgrade | downgrade | install | uninstall" - echo " Install info: ertspath | root_dir" - echo " Runtime info: pid | ping | versions" + echo " Shell attach: remote_console | attach" +# echo " Up/Down-grade: upgrade | downgrade | install | uninstall | versions" # TODO enable when supported + echo " Install Info: ertspath | root_dir" + echo " Runtime Status: pid | ping" echo " Validate Config: check_config" - echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl" + echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl" echo '' echo "Execute '$REL_NAME COMMAND help' for more information" ;; @@ -361,7 +361,7 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then logerr "$COMPATIBILITY_INFO" exit 2 fi - logerr "Using libs from '${DYNLIBS_DIR}' due to missing from the OS." + logwarn "Using libs from '${DYNLIBS_DIR}' due to missing from the OS." fi [ "$DEBUG" -eq 1 ] && set -x fi diff --git a/bin/install_upgrade.escript b/bin/install_upgrade.escript index f7f340f31..3e39c787b 100755 --- a/bin/install_upgrade.escript +++ b/bin/install_upgrade.escript @@ -18,27 +18,18 @@ main([Command0, DistInfoStr | CommandArgs]) -> Opts = parse_arguments(CommandArgs), %% invoke the command passed as argument F = case Command0 of - %% "install" -> fun(A, B) -> install(A, B) end; - %% "unpack" -> fun(A, B) -> unpack(A, B) end; - %% "upgrade" -> fun(A, B) -> upgrade(A, B) end; - %% "downgrade" -> fun(A, B) -> downgrade(A, B) end; - %% "uninstall" -> fun(A, B) -> uninstall(A, B) end; - "versions" -> fun(A, B) -> versions(A, B) end; - _ -> fun fail_upgrade/2 + "install" -> fun(A, B) -> install(A, B) end; + "unpack" -> fun(A, B) -> unpack(A, B) end; + "upgrade" -> fun(A, B) -> upgrade(A, B) end; + "downgrade" -> fun(A, B) -> downgrade(A, B) end; + "uninstall" -> fun(A, B) -> uninstall(A, B) end; + "versions" -> fun(A, B) -> versions(A, B) end end, F(DistInfo, Opts); main(Args) -> ?INFO("unknown args: ~p", [Args]), erlang:halt(1). -%% temporary block for hot-upgrades; next release will just remove -%% this and the new script version shall be used instead of this -%% current version. -%% TODO: always deny relup for macos (unsupported) -fail_upgrade(_DistInfo, _Opts) -> - ?ERROR("Unsupported upgrade path", []), - erlang:halt(1). - unpack({RelName, NameTypeArg, NodeName, Cookie}, Opts) -> TargetNode = start_distribution(NodeName, NameTypeArg, Cookie), Version = proplists:get_value(version, Opts), diff --git a/changes/v5.0.15/feat-9569.en.md b/changes/v5.0.15/feat-9569.en.md deleted file mode 100644 index f3b70ec41..000000000 --- a/changes/v5.0.15/feat-9569.en.md +++ /dev/null @@ -1 +0,0 @@ -Refactor `/authorization/sources/built_in_database/` by adding `rules/` to the path. diff --git a/changes/v5.0.15/feat-9569.zh.md b/changes/v5.0.15/feat-9569.zh.md deleted file mode 100644 index dd2e19c11..000000000 --- a/changes/v5.0.15/feat-9569.zh.md +++ /dev/null @@ -1 +0,0 @@ -重构 `/authorization/sources/built_in_database/` 接口,将 `rules/` 添加到了其路径中。 diff --git a/changes/v5.0.15/feat-9585.en.md b/changes/v5.0.15/feat-9585.en.md deleted file mode 100644 index 986cbb0c3..000000000 --- a/changes/v5.0.15/feat-9585.en.md +++ /dev/null @@ -1 +0,0 @@ -`/bridges_probe` API endpoint to test params for creating a new data bridge. diff --git a/changes/v5.0.15/feat-9585.zh.md b/changes/v5.0.15/feat-9585.zh.md deleted file mode 100644 index 82dd307ae..000000000 --- a/changes/v5.0.15/feat-9585.zh.md +++ /dev/null @@ -1 +0,0 @@ -添加新 API 接口 `/bridges_probe` 用于测试创建桥接的参数是否可用。 diff --git a/changes/v5.0.15/feat-9586.en.md b/changes/v5.0.15/feat-9586.en.md deleted file mode 100644 index 777fb81df..000000000 --- a/changes/v5.0.15/feat-9586.en.md +++ /dev/null @@ -1 +0,0 @@ -Basic auth is no longer allowed for API calls, must use API key instead. diff --git a/changes/v5.0.15/feat-9586.zh.md b/changes/v5.0.15/feat-9586.zh.md deleted file mode 100644 index 102266a46..000000000 --- a/changes/v5.0.15/feat-9586.zh.md +++ /dev/null @@ -1 +0,0 @@ -API 调用不再支持基于 `username:password` 的 `baisc` 认证, 现在 API 必须通过 API Key 才能进行调用。 diff --git a/changes/v5.0.15/feat-9628.en.md b/changes/v5.0.15/feat-9628.en.md deleted file mode 100644 index 6f814dd21..000000000 --- a/changes/v5.0.15/feat-9628.en.md +++ /dev/null @@ -1 +0,0 @@ -Expose additional resource configuration parameters: `start_after_created` and `start_timeout`. diff --git a/changes/v5.0.15/feat-9628.zh.md b/changes/v5.0.15/feat-9628.zh.md deleted file mode 100644 index fee14181b..000000000 --- a/changes/v5.0.15/feat-9628.zh.md +++ /dev/null @@ -1 +0,0 @@ -为桥接资源增加了配置参数:`start_after_created` 和 `start_timeout`。 diff --git a/changes/v5.0.15/feat-9722.en.md b/changes/v5.0.15/feat-9722.en.md deleted file mode 100644 index b86f37b83..000000000 --- a/changes/v5.0.15/feat-9722.en.md +++ /dev/null @@ -1,3 +0,0 @@ -Add the following configuration options for Pushing metrics to Prometheus Push Gateway: -- `headers`: Allows custom HTTP request headers. -- `job_name`: allows to customize the name of the Job pushed to Push Gateway. diff --git a/changes/v5.0.15/feat-9722.zh.md b/changes/v5.0.15/feat-9722.zh.md deleted file mode 100644 index a806cb1de..000000000 --- a/changes/v5.0.15/feat-9722.zh.md +++ /dev/null @@ -1,3 +0,0 @@ -为 Prometheus 推送到 Push Gateway 新增以下配置项: -- `headers`:允许自定义 HTTP 请求头。 -- `job_name`:允许自定义推送到 Push Gateway 的 Job 名称。 diff --git a/changes/v5.0.15/feat-9725.en.md b/changes/v5.0.15/feat-9725.en.md deleted file mode 100644 index 832aa6bf9..000000000 --- a/changes/v5.0.15/feat-9725.en.md +++ /dev/null @@ -1,11 +0,0 @@ -Remove the config `auto_reconnect` from the emqx_authz, emqx_authn and data-bridge componets. -This is because we have another config with similar functions: `resource_opts.auto_restart_interval`。 - -The functions of these two config are difficult to distinguish, which will lead to confusion. -After this change, `auto_reconnect` will not be configurable (always be true), and the underlying -drivers that support this config will automatically reconnect the abnormally disconnected -connection every `2s`. - -And the config `resource_opts.auto_restart_interval` is still available for user. -It is the time interval that emqx restarts the resource when the connection cannot be -established for some reason. diff --git a/changes/v5.0.15/feat-9725.zh.md b/changes/v5.0.15/feat-9725.zh.md deleted file mode 100644 index e7a2412d4..000000000 --- a/changes/v5.0.15/feat-9725.zh.md +++ /dev/null @@ -1,8 +0,0 @@ -从认证、鉴权和数据桥接功能中,删除 `auto_reconnect` 配置项,因为我们还有另一个功能类似的配置项: -`resource_opts.auto_restart_interval`。 - -这两个配置项的功能难以区分,会导致困惑。此修改之后,`auto_reconnect` 将不可配置(永远为 true), -支持此配置的底层驱动将以 `2s` 为周期自动重连异常断开的连接。 - -而 `resource_opts.auto_restart_interval` 配置项仍然开放给用户配置,它是资源因为某些原因 -无法建立连接的时候,emqx 重新启动该资源的时间间隔。 diff --git a/changes/v5.0.15/feat-9736.en.md b/changes/v5.0.15/feat-9736.en.md deleted file mode 100644 index 59d7bd558..000000000 --- a/changes/v5.0.15/feat-9736.en.md +++ /dev/null @@ -1,5 +0,0 @@ -Refactor of /bridges API to make it more consistent with other APIs: -- bridge enable/disable is now done via the endpoint `/bridges/{id}/enable/[true,false]` -- `/bridges/{id}/operation/{operation}` endpoints are now `/bridges/{id}/{operation}` -- metrics are moved out from the GET `/bridges/{id}` response and can now be fetched via `/bridges/{id}/metrics` -- the `bridges/{id}/reset_metrics` endpoint is now `/bridges/{id}/metrics/reset` diff --git a/changes/v5.0.15/feat-9736.zh.md b/changes/v5.0.15/feat-9736.zh.md deleted file mode 100644 index 0107c8ab6..000000000 --- a/changes/v5.0.15/feat-9736.zh.md +++ /dev/null @@ -1,5 +0,0 @@ -重构部分 /bridges 的API 使得其和其他 API 能够更加一致: -- 桥接的启用和禁用现在是通过 `/bridges/{id}/enable/[true,false]` API 来实现的 -- 使用 `/bridges/{id}/{operation}` 替换了旧的 `/bridges/{id}/operation/{operation}` API -- 指标数据从 `/bridges/{id}` 的响应消息中移除,现在可以使用新的 API `/bridges/{id}/metrics` 进行访问 -- 使用 `/bridges/{id}/metrics/reset` 替换了旧的 `bridges/{id}/reset_metrics` API diff --git a/changes/v5.0.15/feat-9774.en.md b/changes/v5.0.15/feat-9774.en.md deleted file mode 100644 index 722c4db6b..000000000 --- a/changes/v5.0.15/feat-9774.en.md +++ /dev/null @@ -1,3 +0,0 @@ -Add a password complexity requirement when adding or modifying Dashboard users via the API. -Now password must contain at least 2 of alphabetic, numeric and special characters, -and must be 8 to 64 characters long. diff --git a/changes/v5.0.15/feat-9774.zh.md b/changes/v5.0.15/feat-9774.zh.md deleted file mode 100644 index 21bfddfaf..000000000 --- a/changes/v5.0.15/feat-9774.zh.md +++ /dev/null @@ -1,2 +0,0 @@ -通过 API 添加、修改 Dashboard 用户时,增加对密码复杂度的要求。 -现在密码必须包含字母、数字以及特殊字符中的至少 2 种,并且长度范围必须是 8~64 个字符。 diff --git a/changes/v5.0.15/fix-9626.en.md b/changes/v5.0.15/fix-9626.en.md deleted file mode 100644 index cc1c86d3e..000000000 --- a/changes/v5.0.15/fix-9626.en.md +++ /dev/null @@ -1,2 +0,0 @@ -Return authorization settings with default values. -The authorization cache is enabled by default, but due to the missing default value in `GET` response of `/authorization/settings`, it seemed to be disabled from the dashboard. diff --git a/changes/v5.0.15/fix-9626.zh.md b/changes/v5.0.15/fix-9626.zh.md deleted file mode 100644 index bc2391f48..000000000 --- a/changes/v5.0.15/fix-9626.zh.md +++ /dev/null @@ -1,3 +0,0 @@ -为授权设置 API 返回默认值。 -授权缓存默认为开启,但是在此修复前,因为默认值在 `/authorization/settings` 这个 API 的返回值中缺失, -使得在仪表盘配置页面中看起来是关闭了。 diff --git a/changes/v5.0.15/fix-9680.en.md b/changes/v5.0.15/fix-9680.en.md deleted file mode 100644 index 2ee3caaa5..000000000 --- a/changes/v5.0.15/fix-9680.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix the problem that username and password authentication is mandatory in Influxdb v1 write API. diff --git a/changes/v5.0.15/fix-9680.zh.md b/changes/v5.0.15/fix-9680.zh.md deleted file mode 100644 index bd1ace306..000000000 --- a/changes/v5.0.15/fix-9680.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复 InfluxDB v1 桥接写入 API 配置中强制需要用户名密码认证的问题。 diff --git a/changes/v5.0.15/fix-9726.en.md b/changes/v5.0.15/fix-9726.en.md deleted file mode 100644 index 9aa522690..000000000 --- a/changes/v5.0.15/fix-9726.en.md +++ /dev/null @@ -1 +0,0 @@ -Client fuzzy search API results were missing information which could tell if more results are available in the next pages, this is now fixed by providing `hasnext` flag in the response. diff --git a/changes/v5.0.15/fix-9726.zh.md b/changes/v5.0.15/fix-9726.zh.md deleted file mode 100644 index 3554d2db7..000000000 --- a/changes/v5.0.15/fix-9726.zh.md +++ /dev/null @@ -1 +0,0 @@ -在此修复前,客户端模糊搜索 API 缺少一些可以用于判断是否可以继续翻页的信息,现在通过在响应中提供 `hasnext` 标志来解决这个问题。 diff --git a/changes/v5.0.15/fix-9735.en.md b/changes/v5.0.15/fix-9735.en.md deleted file mode 100644 index 6085adecd..000000000 --- a/changes/v5.0.15/fix-9735.en.md +++ /dev/null @@ -1 +0,0 @@ -Password information has been removed from information log messages for http, ldap, mongo, mqtt, mysql, pgsql and redis. diff --git a/changes/v5.0.15/fix-9735.zh.md b/changes/v5.0.15/fix-9735.zh.md deleted file mode 100644 index d8aa81fd1..000000000 --- a/changes/v5.0.15/fix-9735.zh.md +++ /dev/null @@ -1 +0,0 @@ -密码信息已从http、ldap、mongo、mqtt、mysql、pgsql和redis的信息日志消息中删除。 diff --git a/changes/v5.0.15/fix-9748.en.md b/changes/v5.0.15/fix-9748.en.md deleted file mode 100644 index 85f5896b2..000000000 --- a/changes/v5.0.15/fix-9748.en.md +++ /dev/null @@ -1 +0,0 @@ -Listeners not configured with `max_connections` will cause the cluster `/listeners` API to return 500 error. diff --git a/changes/v5.0.15/fix-9748.zh.md b/changes/v5.0.15/fix-9748.zh.md deleted file mode 100644 index cab352e79..000000000 --- a/changes/v5.0.15/fix-9748.zh.md +++ /dev/null @@ -1 +0,0 @@ -监听器不配置 `max_connections` 时会导致集群 `/listeners` 接口返回 500 错误。 diff --git a/changes/v5.0.15/fix-9749.en.md b/changes/v5.0.15/fix-9749.en.md deleted file mode 100644 index f079385ce..000000000 --- a/changes/v5.0.15/fix-9749.en.md +++ /dev/null @@ -1 +0,0 @@ -In some cases search APIs could respond with an incorrect `count` value in the metadata, that is usually much bigger than expected, this is now fixed. diff --git a/changes/v5.0.15/fix-9749.zh.md b/changes/v5.0.15/fix-9749.zh.md deleted file mode 100644 index 356cf9475..000000000 --- a/changes/v5.0.15/fix-9749.zh.md +++ /dev/null @@ -1 +0,0 @@ -在某些情况下,搜索 API 可能会在元数据中响应不正确的 `count` 值,这通常比预期的要大得多,现在已经修复了。 diff --git a/changes/v5.0.15/fix-9750.en.md b/changes/v5.0.15/fix-9750.en.md deleted file mode 100644 index 98c07dfb6..000000000 --- a/changes/v5.0.15/fix-9750.en.md +++ /dev/null @@ -1,5 +0,0 @@ -Reload overriding configs after boot. -Prior to this change, two configs were allow to change from dashboard, but will not take effect after reboot: - * Logging (such as level) - * Prometheus configs - diff --git a/changes/v5.0.15/fix-9750.zh.md b/changes/v5.0.15/fix-9750.zh.md deleted file mode 100644 index 605d4c225..000000000 --- a/changes/v5.0.15/fix-9750.zh.md +++ /dev/null @@ -1,4 +0,0 @@ -启动后重新加载一些重载配置项。 -在此修复前,下面两个配置项允许从 Dashboard 控制台修改,但是在重启后无法生效: - * 日志 (例如日志级别) - * Prometheus 配置 diff --git a/changes/v5.0.15/fix-9751.en.md b/changes/v5.0.15/fix-9751.en.md deleted file mode 100644 index f45b99129..000000000 --- a/changes/v5.0.15/fix-9751.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix that obsoleted cert file will not be deleted after the listener is updated/deleted diff --git a/changes/v5.0.15/fix-9751.zh.md b/changes/v5.0.15/fix-9751.zh.md deleted file mode 100644 index 3908e5c20..000000000 --- a/changes/v5.0.15/fix-9751.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复在更新或者删除监听器后,过时的证书文件没有被删除的问题。 diff --git a/changes/v5.0.15/fix-9763.en.md b/changes/v5.0.15/fix-9763.en.md deleted file mode 100644 index 8c07a3d5d..000000000 --- a/changes/v5.0.15/fix-9763.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix an authentication exception when password is not provided diff --git a/changes/v5.0.15/fix-9763.zh.md b/changes/v5.0.15/fix-9763.zh.md deleted file mode 100644 index 8548a363e..000000000 --- a/changes/v5.0.15/fix-9763.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复客户端没有提供密码时的一个异常 diff --git a/changes/v5.0.15/fix-9765.en.md b/changes/v5.0.15/fix-9765.en.md deleted file mode 100644 index 7de7e55f3..000000000 --- a/changes/v5.0.15/fix-9765.en.md +++ /dev/null @@ -1,6 +0,0 @@ -Parse decimals as password from environment variable overrides correctly. -Prior to this change, config values for passwords are not allowed to be decimals. -e.g. `EMQX_FOOBAR__PASSWORD=12344` or `emqx.foobar.password=1234` -would result in a type check error, unless quoted as: -`EMQX_FOOBAR__PASSWORD='"12344"'` or `emqx.foobar.password="1234"`. -After this fix, the value does not have to be quoted. diff --git a/changes/v5.0.15/fix-9765.zh.md b/changes/v5.0.15/fix-9765.zh.md deleted file mode 100644 index dd0b6a79c..000000000 --- a/changes/v5.0.15/fix-9765.zh.md +++ /dev/null @@ -1,7 +0,0 @@ -允许使用纯数字作为密码配置。 -在此修复前,密码的配置必须是字符串,使用纯数字时,会报类型检查错误。 -例如,`EMQX_FOOBAR__PASSWORD=12344` 或 `emqx.foobar.password=1234` 会出错, -必须用引把值括起来才行: -`EMQX_FOOBAR__PASSWORD='"12344"'` 或 `emqx.foobar.password="1234"`。 -修复后可以不使用引号。在环境变量重载中使用更加方便。 - diff --git a/changes/v5.0.15/fix-9769.en.md b/changes/v5.0.15/fix-9769.en.md deleted file mode 100644 index e07397363..000000000 --- a/changes/v5.0.15/fix-9769.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix Erlang shell prompt version prefix. e5.0.15 -> v5.0.15 diff --git a/changes/v5.0.15/fix-9769.zh.md b/changes/v5.0.15/fix-9769.zh.md deleted file mode 100644 index c7e63b862..000000000 --- a/changes/v5.0.15/fix-9769.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复 Eralng 控制台版本号前缀的打印错误 e5.0.15 -> v5.0.15 diff --git a/changes/v5.0.15/fix-9780.en.md b/changes/v5.0.15/fix-9780.en.md deleted file mode 100644 index cf777e6dc..000000000 --- a/changes/v5.0.15/fix-9780.en.md +++ /dev/null @@ -1 +0,0 @@ -When creating disk queue directory for resource worker, substitute ':' with '-' in worker id. diff --git a/changes/v5.0.15/fix-9780.zh.md b/changes/v5.0.15/fix-9780.zh.md deleted file mode 100644 index bc5079e1d..000000000 --- a/changes/v5.0.15/fix-9780.zh.md +++ /dev/null @@ -1 +0,0 @@ -在为资源缓存进程创建磁盘队列目录时,在ID中用 '-' 代替 ':'。 diff --git a/changes/v5.0.15/fix-9781.en.md b/changes/v5.0.15/fix-9781.en.md deleted file mode 100644 index 2b34ddc24..000000000 --- a/changes/v5.0.15/fix-9781.en.md +++ /dev/null @@ -1 +0,0 @@ -Trace files were left on a node when creating a zip file for download. They are now removed when the file is sent. Also, concurrent downloads will no longer interfere with each other. diff --git a/changes/v5.0.15/fix-9781.zh.md b/changes/v5.0.15/fix-9781.zh.md deleted file mode 100644 index 5c4cee0f5..000000000 --- a/changes/v5.0.15/fix-9781.zh.md +++ /dev/null @@ -1 +0,0 @@ -当下载 日志追踪 的日志时,一些中间文件将存留在处理节点上,现在这个问题得到了修复。同时,并发下载日志将不再相互干扰。 diff --git a/changes/v5.0.15/fix-9785.en.md b/changes/v5.0.15/fix-9785.en.md deleted file mode 100644 index 8af14b1ed..000000000 --- a/changes/v5.0.15/fix-9785.en.md +++ /dev/null @@ -1 +0,0 @@ -Stop authentication hook chain if `emqx_authentication` provides a definitive result. diff --git a/changes/v5.0.15/fix-9785.zh.md b/changes/v5.0.15/fix-9785.zh.md deleted file mode 100644 index 6aa84f755..000000000 --- a/changes/v5.0.15/fix-9785.zh.md +++ /dev/null @@ -1 +0,0 @@ -如果 `emqx_authentication` 提供了确定的结果,则停止认证钩子链。 diff --git a/changes/v5.0.15/fix-9787.en.md b/changes/v5.0.15/fix-9787.en.md deleted file mode 100644 index b41180368..000000000 --- a/changes/v5.0.15/fix-9787.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix a compatible problem for the `webhook` bridge configuration which was created before the v5.0.12. diff --git a/changes/v5.0.15/fix-9787.zh.md b/changes/v5.0.15/fix-9787.zh.md deleted file mode 100644 index a9d758de0..000000000 --- a/changes/v5.0.15/fix-9787.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复对在 v5.0.12 之前创建的 `webhook` 桥接配置的兼容问题。 diff --git a/changes/v5.0.16-en.md b/changes/v5.0.16-en.md new file mode 100644 index 000000000..8995d6d10 --- /dev/null +++ b/changes/v5.0.16-en.md @@ -0,0 +1,18 @@ +# v5.0.16 + +## Enhancements + + + +## Bug fixes + +- [#9824](https://github.com/emqx/emqx/pull/9824) The `topics/{topic}` API endpoint would return `500 - Internal Error` if a topic had multiple routes. This is fixed by returning a list of routes. + +- [#9832](https://github.com/emqx/emqx/pull/9832) Improve error log when bridge in 'sync' mode timed out to get response. + +- [#9834](https://github.com/emqx/emqx/pull/9834) Allow `mqtt.idle_timeout` to be set to `infinity` + +- [#9839](https://github.com/emqx/emqx/pull/9839) Make sure that the content of an Authorization header that users have specified for a webhook bridge is not printed to log files. + +- [#9884](https://github.com/emqx/emqx/pull/9884) Do not resume all buffer workers on successful health check of any individual resource. + Previously after any successful healthcheck, all buffer workers (for all resources) were resumed diff --git a/changes/v5.0.16-zh.md b/changes/v5.0.16-zh.md new file mode 100644 index 000000000..e8912efbb --- /dev/null +++ b/changes/v5.0.16-zh.md @@ -0,0 +1,18 @@ +# v5.0.16 + +## 增强 + + + +## 修复 + +- [#9824](https://github.com/emqx/emqx/pull/9824) 修复:当存在多个路由信息时,topics/{topic} 将会返回 500 - Internal Error 的问题,现在将会正确的返回路由信息列表。 + +- [#9832](https://github.com/emqx/emqx/pull/9832) 优化桥接同步资源调用超时情况下的一个错误日志。 + +- [#9834](https://github.com/emqx/emqx/pull/9834) 允许配置项 `mqtt.idle_timeout` 设置成 `infinity` + +- [#9839](https://github.com/emqx/emqx/pull/9839) 确保用户为webhook-bridge指定的Authorization-HTTP-header的内容不会被打印到日志文件。 + +- [#9884](https://github.com/emqx/emqx/pull/9884) 不在任意一个资源健康检查成功时恢复所有资源发送缓存。 + 在此修复之前,在任意一个资源成功进行健康检查后,所有资源的缓存都会尝试恢复。 diff --git a/changes/v5.0.16/fix-9834.en.md b/changes/v5.0.16/fix-9834.en.md deleted file mode 100644 index d5ad1f67a..000000000 --- a/changes/v5.0.16/fix-9834.en.md +++ /dev/null @@ -1 +0,0 @@ -Allow `mqtt.idle_timeout` to be set to `infinity` diff --git a/changes/v5.0.16/fix-9834.zh.md b/changes/v5.0.16/fix-9834.zh.md deleted file mode 100644 index 06eafc1a0..000000000 --- a/changes/v5.0.16/fix-9834.zh.md +++ /dev/null @@ -1 +0,0 @@ -允许配置项 `mqtt.idle_timeout` 设置成 `infinity` diff --git a/changes/v5.0.16/feat-9802.en.md b/changes/v5.0.17/feat-9802.en.md similarity index 100% rename from changes/v5.0.16/feat-9802.en.md rename to changes/v5.0.17/feat-9802.en.md diff --git a/changes/v5.0.16/feat-9802.zh.md b/changes/v5.0.17/feat-9802.zh.md similarity index 100% rename from changes/v5.0.16/feat-9802.zh.md rename to changes/v5.0.17/feat-9802.zh.md diff --git a/changes/v5.0.16/feat-9871.en.md b/changes/v5.0.17/feat-9871.en.md similarity index 100% rename from changes/v5.0.16/feat-9871.en.md rename to changes/v5.0.17/feat-9871.en.md diff --git a/changes/v5.0.16/feat-9871.zh.md b/changes/v5.0.17/feat-9871.zh.md similarity index 100% rename from changes/v5.0.16/feat-9871.zh.md rename to changes/v5.0.17/feat-9871.zh.md diff --git a/changes/v5.0.16/fix-9864.en.md b/changes/v5.0.17/fix-9864.en.md similarity index 100% rename from changes/v5.0.16/fix-9864.en.md rename to changes/v5.0.17/fix-9864.en.md diff --git a/changes/v5.0.16/fix-9864.zh.md b/changes/v5.0.17/fix-9864.zh.md similarity index 100% rename from changes/v5.0.16/fix-9864.zh.md rename to changes/v5.0.17/fix-9864.zh.md diff --git a/deploy/charts/emqx-enterprise/templates/configmap.yaml b/deploy/charts/emqx-enterprise/templates/configmap.yaml index 5086f85f6..6bd815ca0 100644 --- a/deploy/charts/emqx-enterprise/templates/configmap.yaml +++ b/deploy/charts/emqx-enterprise/templates/configmap.yaml @@ -16,9 +16,9 @@ data: EMQX_CLUSTER__K8S__SERVICE_NAME: {{ include "emqx.fullname" . }}-headless EMQX_CLUSTER__K8S__NAMESPACE: {{ .Release.Namespace }} EMQX_CLUSTER__K8S__ADDRESS_TYPE: "hostname" - EMQX_CLUSTER__K8S__SUFFIX: "svc.cluster.local" + EMQX_CLUSTER__K8S__SUFFIX: "svc.{{ .Values.clusterDomain }}" {{- else if eq (.Values.emqxConfig.EMQX_CLUSTER__DISCOVERY_STRATEGY) "dns" }} - EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.cluster.local" + EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" EMQX_CLUSTER__DNS__RECORD_TYPE: "srv" {{- end -}} {{- range $index, $value := .Values.emqxConfig }} diff --git a/deploy/charts/emqx-enterprise/values.yaml b/deploy/charts/emqx-enterprise/values.yaml index b9507c5a0..3a607a71e 100644 --- a/deploy/charts/emqx-enterprise/values.yaml +++ b/deploy/charts/emqx-enterprise/values.yaml @@ -35,6 +35,8 @@ serviceAccount: ## Forces the recreation of pods during helm upgrades. This can be useful to update configuration values even if the container image did not change. recreatePods: false +clusterDomain: cluster.local + podAnnotations: {} # Pod deployment policy diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index fb689839c..2208f3019 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.0.15 +version: 5.0.16 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.0.15 +appVersion: 5.0.16 diff --git a/deploy/charts/emqx/templates/configmap.yaml b/deploy/charts/emqx/templates/configmap.yaml index 5086f85f6..6bd815ca0 100644 --- a/deploy/charts/emqx/templates/configmap.yaml +++ b/deploy/charts/emqx/templates/configmap.yaml @@ -16,9 +16,9 @@ data: EMQX_CLUSTER__K8S__SERVICE_NAME: {{ include "emqx.fullname" . }}-headless EMQX_CLUSTER__K8S__NAMESPACE: {{ .Release.Namespace }} EMQX_CLUSTER__K8S__ADDRESS_TYPE: "hostname" - EMQX_CLUSTER__K8S__SUFFIX: "svc.cluster.local" + EMQX_CLUSTER__K8S__SUFFIX: "svc.{{ .Values.clusterDomain }}" {{- else if eq (.Values.emqxConfig.EMQX_CLUSTER__DISCOVERY_STRATEGY) "dns" }} - EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.cluster.local" + EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" EMQX_CLUSTER__DNS__RECORD_TYPE: "srv" {{- end -}} {{- range $index, $value := .Values.emqxConfig }} diff --git a/deploy/charts/emqx/values.yaml b/deploy/charts/emqx/values.yaml index 0423c8cdf..c737c8808 100644 --- a/deploy/charts/emqx/values.yaml +++ b/deploy/charts/emqx/values.yaml @@ -35,6 +35,8 @@ serviceAccount: ## Forces the recreation of pods during helm upgrades. This can be useful to update configuration values even if the container image did not change. recreatePods: false +clusterDomain: cluster.local + podAnnotations: {} # Pod deployment policy diff --git a/deploy/packages/emqx.service b/deploy/packages/emqx.service index d505cc519..d826e358b 100644 --- a/deploy/packages/emqx.service +++ b/deploy/packages/emqx.service @@ -13,7 +13,7 @@ Environment=HOME=/var/lib/emqx # Enable logging to file Environment=EMQX_LOG__TO=default -# Start 'foregroun' but not 'start' (daemon) mode. +# Start 'foreground' but not 'start' (daemon) mode. # Because systemd monitor/restarts 'simple' services ExecStart=/bin/bash /usr/bin/emqx foreground diff --git a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf index be1e581bd..fff798e19 100644 --- a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf +++ b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf @@ -220,7 +220,7 @@ emqx_ee_bridge_kafka { desc { en: "When set to 'true', TCP buffer sent as soon as possible. " "Otherwise, the OS kernel may buffer small TCP packets for a while (40 ms by default)." - zh: "设置 ‘true' 让系统内核立即发送。否则当需要发送当内容很少时,可能会有一定延迟(默认 40 毫秒)。" + zh: "设置‘true’让系统内核立即发送。否则当需要发送的内容很少时,可能会有一定延迟(默认 40 毫秒)。" } label { en: "No Delay" @@ -303,7 +303,7 @@ emqx_ee_bridge_kafka { "When a single message is over the limit, it is still sent (as a single element batch)." zh: "最大消息批量字节数。" "大多数 Kafka 环境的默认最低值是 1 MB,EMQX 的默认值比 1 MB 更小是因为需要" - "补偿 Kafka 消息编码索需要的额外字节(尤其是当每条消息都很小的情况下)。" + "补偿 Kafka 消息编码所需要的额外字节(尤其是当每条消息都很小的情况下)。" "当单个消息的大小超过该限制时,它仍然会被发送,(相当于该批量中只有单个消息)。" } label { @@ -371,7 +371,7 @@ emqx_ee_bridge_kafka { "Greater value typically means better throughput. However, there can be a risk of message reordering when this " "value is greater than 1." zh: "设置 Kafka 生产者(每个分区一个)在收到 Kafka 的确认前最多发送多少个请求(批量)。" - "调大这个值通常可以增加吞吐量,但是,当该值设置大于 1 是存在消息乱序的风险。" + "调大这个值通常可以增加吞吐量,但是,当该值设置大于 1 时存在消息乱序的风险。" } label { en: "Max Inflight" @@ -395,14 +395,14 @@ emqx_ee_bridge_kafka { desc { en: "Message buffer mode.\n\n" "memory: Buffer all messages in memory. The messages will be lost in case of EMQX node restart\n" - "disc: Buffer all messages on disk. The messages on disk are able to survive EMQX node restart.\n" + "disk: Buffer all messages on disk. The messages on disk are able to survive EMQX node restart.\n" "hybrid: Buffer message in memory first, when up to certain limit " "(see segment_bytes config for more information), then start offloading " "messages to disk, Like memory mode, the messages will be lost in case of " "EMQX node restart." zh: "消息缓存模式。\n" "memory: 所有的消息都缓存在内存里。如果 EMQX 服务重启,缓存的消息会丢失。\n" - "disc: 缓存到磁盘上。EMQX 重启后会继续发送重启前未发送完成的消息。\n" + "disk: 缓存到磁盘上。EMQX 重启后会继续发送重启前未发送完成的消息。\n" "hybrid: 先将消息缓存在内存中,当内存中的消息堆积超过一定限制" "(配置项 segment_bytes 描述了该限制)后,后续的消息会缓存到磁盘上。" "与 memory 模式一样,如果 EMQX 服务重启,缓存的消息会丢失。" diff --git a/lib-ee/emqx_ee_bridge/rebar.config b/lib-ee/emqx_ee_bridge/rebar.config index 6ca554c72..fa6dd560e 100644 --- a/lib-ee/emqx_ee_bridge/rebar.config +++ b/lib-ee/emqx_ee_bridge/rebar.config @@ -1,5 +1,5 @@ {erl_opts, [debug_info]}. -{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.4"}}} +{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.5"}}} , {kafka_protocol, {git, "https://github.com/kafka4beam/kafka_protocol.git", {tag, "4.1.2"}}} , {brod_gssapi, {git, "https://github.com/kafka4beam/brod_gssapi.git", {tag, "v0.1.0-rc1"}}} , {brod, {git, "https://github.com/kafka4beam/brod.git", {tag, "3.16.7"}}} diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl index b62871299..8312c081c 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl @@ -39,7 +39,7 @@ fields("config") -> {enable, mk(boolean(), #{desc => ?DESC("enable"), default => true})}, {collection, mk(binary(), #{desc => ?DESC("collection"), default => <<"mqtt">>})}, {payload_template, mk(binary(), #{required => false, desc => ?DESC("payload_template")})} - ] ++ fields("resource_opts"); + ] ++ emqx_resource_schema:fields("resource_opts_sync_only"); fields(mongodb_rs) -> emqx_connector_mongo:fields(rs) ++ fields("config"); fields(mongodb_sharded) -> @@ -69,32 +69,7 @@ fields("get_sharded") -> fields("get_single") -> emqx_bridge_schema:status_fields() ++ fields(mongodb_single) ++ - type_and_name_fields(mongodb_single); -fields("creation_opts") -> - lists:map( - fun - ({query_mode, _FieldSchema}) -> - {query_mode, - mk( - enum([sync, async]), - #{ - desc => ?DESC(emqx_resource_schema, "query_mode"), - default => sync - } - )}; - (Field) -> - Field - end, - emqx_resource_schema:fields("creation_opts") - ); -fields("resource_opts") -> - [ - {resource_opts, - mk( - ref(?MODULE, "creation_opts"), - #{default => #{}, desc => ?DESC(emqx_resource_schema, "resource_opts")} - )} - ]. + type_and_name_fields(mongodb_single). conn_bridge_examples(Method) -> [ diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl index fadf05848..fd4d9bdd9 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl @@ -98,8 +98,7 @@ fields("config") -> (emqx_connector_mysql:fields(config) -- emqx_connector_schema_lib:prepare_statement_fields()); fields("creation_opts") -> - Opts = emqx_resource_schema:fields("creation_opts"), - [O || {Field, _} = O <- Opts, not is_hidden_opts(Field)]; + emqx_resource_schema:fields("creation_opts_sync_only"); fields("post") -> [type_field(), name_field() | fields("config")]; fields("put") -> @@ -118,10 +117,6 @@ desc(_) -> %% ------------------------------------------------------------------------------------------------- %% internal -is_hidden_opts(Field) -> - lists:member(Field, [ - async_inflight_window - ]). type_field() -> {type, mk(enum([mysql]), #{required => true, desc => ?DESC("desc_type")})}. diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl index 8bf7b1969..b592197f9 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl @@ -100,8 +100,7 @@ fields("config") -> (emqx_connector_pgsql:fields(config) -- emqx_connector_schema_lib:prepare_statement_fields()); fields("creation_opts") -> - Opts = emqx_resource_schema:fields("creation_opts"), - [O || {Field, _} = O <- Opts, not is_hidden_opts(Field)]; + emqx_resource_schema:fields("creation_opts_sync_only"); fields("post") -> fields("post", pgsql); fields("put") -> @@ -122,11 +121,6 @@ desc(_) -> undefined. %% ------------------------------------------------------------------------------------------------- -%% internal -is_hidden_opts(Field) -> - lists:member(Field, [ - async_inflight_window - ]). type_field(Type) -> {type, mk(enum([Type]), #{required => true, desc => ?DESC("desc_type")})}. diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl index 3a3963786..18822ba11 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl @@ -7,7 +7,7 @@ -include_lib("typerefl/include/types.hrl"). -include_lib("hocon/include/hoconsc.hrl"). --import(hoconsc, [mk/2, enum/1, ref/2]). +-import(hoconsc, [mk/2, enum/1, ref/1, ref/2]). -export([ conn_bridge_examples/1 @@ -80,13 +80,20 @@ values(common, RedisType, SpecificOpts) -> pool_size => 8, password => <<"secret">>, command_template => [<<"LPUSH">>, <<"MSGS">>, <<"${payload}">>], - resource_opts => #{ + resource_opts => values(resource_opts, RedisType, #{}), + ssl => #{enable => false} + }, + maps:merge(Config, SpecificOpts); +values(resource_opts, "cluster", SpecificOpts) -> + SpecificOpts; +values(resource_opts, _RedisType, SpecificOpts) -> + maps:merge( + #{ batch_size => 1, batch_time => <<"20ms">> }, - ssl => #{enable => false} - }, - maps:merge(Config, SpecificOpts). + SpecificOpts + ). %% ------------------------------------------------------------------------------------------------- %% Hocon Schema Definitions @@ -115,29 +122,31 @@ fields("get_cluster") -> fields(Type) when Type == redis_single orelse Type == redis_sentinel orelse Type == redis_cluster -> - redis_bridge_common_fields() ++ - connector_fields(Type). + redis_bridge_common_fields(Type) ++ + connector_fields(Type); +fields("creation_opts_" ++ Type) -> + resource_creation_fields(Type). method_fileds(post, ConnectorType) -> - redis_bridge_common_fields() ++ + redis_bridge_common_fields(ConnectorType) ++ connector_fields(ConnectorType) ++ type_name_fields(ConnectorType); method_fileds(get, ConnectorType) -> - redis_bridge_common_fields() ++ + redis_bridge_common_fields(ConnectorType) ++ connector_fields(ConnectorType) ++ type_name_fields(ConnectorType) ++ emqx_bridge_schema:status_fields(); method_fileds(put, ConnectorType) -> - redis_bridge_common_fields() ++ + redis_bridge_common_fields(ConnectorType) ++ connector_fields(ConnectorType). -redis_bridge_common_fields() -> +redis_bridge_common_fields(Type) -> emqx_bridge_schema:common_bridge_fields() ++ [ {local_topic, mk(binary(), #{desc => ?DESC("local_topic")})}, {command_template, fun command_template/1} ] ++ - emqx_resource_schema:fields("resource_opts"). + resource_fields(Type). connector_fields(Type) -> RedisType = bridge_type_to_redis_conn_type(Type), @@ -156,6 +165,27 @@ type_name_fields(Type) -> {name, mk(binary(), #{required => true, desc => ?DESC("desc_name")})} ]. +resource_fields(Type) -> + [ + {resource_opts, + mk( + ref("creation_opts_" ++ atom_to_list(Type)), + #{ + required => false, + default => #{}, + desc => ?DESC(emqx_resource_schema, <<"resource_opts">>) + } + )} + ]. + +resource_creation_fields("redis_cluster") -> + % TODO + % Cluster bridge is currently incompatible with batching. + Fields = emqx_resource_schema:fields("creation_opts_sync_only"), + lists:foldl(fun proplists:delete/2, Fields, [batch_size, batch_time, enable_batch]); +resource_creation_fields(_) -> + emqx_resource_schema:fields("creation_opts_sync_only"). + desc("config") -> ?DESC("desc_config"); desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> @@ -166,6 +196,8 @@ desc(redis_sentinel) -> ?DESC(emqx_connector_redis, "sentinel"); desc(redis_cluster) -> ?DESC(emqx_connector_redis, "cluster"); +desc("creation_opts_" ++ _Type) -> + ?DESC(emqx_resource_schema, "creation_opts"); desc(_) -> undefined. diff --git a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl index 18e27b775..ac98209ed 100644 --- a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl +++ b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl @@ -91,6 +91,7 @@ on_start(InstId, Config) -> {ok, #{ message_template => compile_message_template(MessageTemplate), client_id => ClientId, + kafka_topic => KafkaTopic, producers => Producers, resource_id => ResourceID }}; @@ -227,15 +228,42 @@ render_timestamp(Template, Message) -> %% Wolff producer never gives up retrying %% so there can only be 'ok' results. on_kafka_ack(_Partition, Offset, {ReplyFn, Args}) when is_integer(Offset) -> - %% the ReplyFn is emqx_resource_worker:reply_after_query/8 + %% the ReplyFn is emqx_resource_worker:handle_async_reply/2 apply(ReplyFn, Args ++ [ok]); on_kafka_ack(_Partition, buffer_overflow_discarded, _Callback) -> %% wolff should bump the dropped_queue_full counter %% do not apply the callback (which is basically to bump success or fail counter) ok. -on_get_status(_InstId, _State) -> - connected. +on_get_status(_InstId, #{client_id := ClientId, kafka_topic := KafkaTopic}) -> + case wolff_client_sup:find_client(ClientId) of + {ok, Pid} -> + do_get_status(Pid, KafkaTopic); + {error, _Reason} -> + disconnected + end. + +do_get_status(Client, KafkaTopic) -> + %% TODO: add a wolff_producers:check_connectivity + case wolff_client:get_leader_connections(Client, KafkaTopic) of + {ok, Leaders} -> + %% Kafka is considered healthy as long as any of the partition leader is reachable + case + lists:any( + fun({_Partition, Pid}) -> + is_pid(Pid) andalso erlang:is_process_alive(Pid) + end, + Leaders + ) + of + true -> + connected; + false -> + disconnected + end; + {error, _} -> + disconnected + end. %% Parse comma separated host:port list into a [{Host,Port}] list hosts(Hosts) when is_binary(Hosts) -> diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl index 247b7799b..222acb77b 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl @@ -850,7 +850,6 @@ test_publish_success_batch(Config) -> t_not_a_json(Config) -> ?assertMatch( {error, #{ - discarded_errors_count := 0, kind := validation_error, reason := #{exception := {error, {badmap, "not a json"}}}, %% should be censored as it contains secrets @@ -868,7 +867,6 @@ t_not_a_json(Config) -> t_not_of_service_account_type(Config) -> ?assertMatch( {error, #{ - discarded_errors_count := 0, kind := validation_error, reason := {wrong_type, <<"not a service account">>}, %% should be censored as it contains secrets @@ -887,7 +885,6 @@ t_json_missing_fields(Config) -> GCPPubSubConfig0 = ?config(gcp_pubsub_config, Config), ?assertMatch( {error, #{ - discarded_errors_count := 0, kind := validation_error, reason := {missing_keys, [ diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl index e1899b1b2..bbde88cc7 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl @@ -910,19 +910,17 @@ t_write_failure(Config) -> sync -> {_, {ok, _}} = ?wait_async_action( - try + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, send_message(Config, SentData) - catch - error:timeout -> - {error, timeout} - end, + ), #{?snk_kind := buffer_worker_flush_nack}, 1_000 ); async -> ?wait_async_action( ?assertEqual(ok, send_message(Config, SentData)), - #{?snk_kind := buffer_worker_reply_after_query}, + #{?snk_kind := handle_async_reply}, 1_000 ) end @@ -940,14 +938,15 @@ t_write_failure(Config) -> #{got => Result} ); async -> - Trace = ?of_kind(buffer_worker_reply_after_query, Trace0), + Trace = ?of_kind(handle_async_reply, Trace0), ?assertMatch([#{action := nack} | _], Trace), [#{result := Result} | _] = Trace, ?assert( {error, {recoverable_error, {closed, "The connection was lost."}}} =:= Result orelse {error, {error, closed}} =:= Result orelse - {error, {recoverable_error, econnrefused}} =:= Result, + {error, {recoverable_error, econnrefused}} =:= Result orelse + {error, {recoverable_error, noproc}} =:= Result, #{got => Result} ) end, diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl index 57792b366..fec85c874 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl @@ -406,7 +406,10 @@ t_write_failure(Config) -> emqx_common_test_helpers:with_failure(down, ProxyName, ProxyHost, ProxyPort, fun() -> case QueryMode of sync -> - ?assertError(timeout, send_message(Config, SentData)); + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, + send_message(Config, SentData) + ); async -> send_message(Config, SentData) end @@ -439,8 +442,8 @@ t_write_timeout(Config) -> SentData = #{payload => Val, timestamp => 1668602148000}, Timeout = 1000, emqx_common_test_helpers:with_failure(timeout, ProxyName, ProxyHost, ProxyPort, fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, query_resource(Config, {send_message, SentData, [], Timeout}) ) end), diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl index 25752f685..6fbb9689f 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl @@ -426,12 +426,7 @@ t_write_failure(Config) -> ?wait_async_action( case QueryMode of sync -> - try - send_message(Config, SentData) - catch - error:timeout -> - {error, timeout} - end; + ?assertMatch({error, _}, send_message(Config, SentData)); async -> send_message(Config, SentData) end, @@ -467,7 +462,10 @@ t_write_timeout(Config) -> SentData = #{payload => Val, timestamp => 1668602148000}, Timeout = 1000, emqx_common_test_helpers:with_failure(timeout, ProxyName, ProxyHost, ProxyPort, fun() -> - ?assertError(timeout, query_resource(Config, {send_message, SentData, [], Timeout})) + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, + query_resource(Config, {send_message, SentData, [], Timeout}) + ) end), ok. diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl index 2b67787b2..67a9b4a05 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl @@ -16,6 +16,9 @@ %% CT boilerplate %%------------------------------------------------------------------------------ +-define(KEYSHARDS, 3). +-define(KEYPREFIX, "MSGS"). + -define(REDIS_TOXYPROXY_CONNECT_CONFIG, #{ <<"server">> => <<"toxiproxy:6379">>, <<"redis_type">> => <<"single">> @@ -23,7 +26,7 @@ -define(COMMON_REDIS_OPTS, #{ <<"password">> => <<"public">>, - <<"command_template">> => [<<"RPUSH">>, <<"MSGS">>, <<"${payload}">>], + <<"command_template">> => [<<"RPUSH">>, <>, <<"${payload}">>], <<"local_topic">> => <<"local_topic/#">> }). @@ -47,7 +50,7 @@ ) ). -all() -> [{group, transport_types}, {group, rest}]. +all() -> [{group, transports}, {group, rest}]. groups() -> ResourceSpecificTCs = [t_create_delete_bridge], @@ -63,7 +66,7 @@ groups() -> ], [ {rest, TCs}, - {transport_types, [ + {transports, [ {group, tcp}, {group, tls} ]}, @@ -79,7 +82,7 @@ groups() -> init_per_group(Group, Config) when Group =:= redis_single; Group =:= redis_sentinel; Group =:= redis_cluster -> - [{transport_type, Group} | Config]; + [{connector_type, Group} | Config]; init_per_group(Group, Config) when Group =:= tcp; Group =:= tls -> @@ -139,12 +142,13 @@ end_per_suite(_Config) -> init_per_testcase(_Testcase, Config) -> ok = delete_all_rules(), ok = delete_all_bridges(), - case ?config(transport_type, Config) of - undefined -> + case {?config(connector_type, Config), ?config(batch_mode, Config)} of + {undefined, _} -> Config; - RedisType -> + {redis_cluster, batch_on} -> + {skip, "Batching is not supported by 'redis_cluster' bridge type"}; + {RedisType, BatchMode} -> Transport = ?config(transport, Config), - BatchMode = ?config(batch_mode, Config), #{RedisType := #{Transport := RedisConnConfig}} = redis_connect_configs(), #{BatchMode := ResourceConfig} = resource_configs(), IsBatch = (BatchMode =:= batch_on), @@ -162,7 +166,7 @@ end_per_testcase(_Testcase, Config) -> t_create_delete_bridge(Config) -> Name = <<"mybridge">>, - Type = ?config(transport_type, Config), + Type = ?config(connector_type, Config), BridgeConfig = ?config(bridge_config, Config), IsBatch = ?config(is_batch, Config), ?assertMatch( @@ -350,9 +354,7 @@ check_resource_queries(ResourceId, BaseTopic, IsBatch) -> ?wait_async_action( lists:foreach( fun(I) -> - IBin = integer_to_binary(I), - Topic = <>, - _ = publish_message(Topic, RandomPayload) + _ = publish_message(format_topic(BaseTopic, I), RandomPayload) end, lists:seq(1, N) ), @@ -360,7 +362,7 @@ check_resource_queries(ResourceId, BaseTopic, IsBatch) -> 5000 ), fun(Trace) -> - AddedMsgCount = length(added_msgs(ResourceId, RandomPayload)), + AddedMsgCount = length(added_msgs(ResourceId, BaseTopic, RandomPayload)), case IsBatch of true -> ?assertMatch( @@ -378,11 +380,23 @@ check_resource_queries(ResourceId, BaseTopic, IsBatch) -> end ). -added_msgs(ResourceId, Payload) -> - {ok, Results} = emqx_resource:simple_sync_query( - ResourceId, {cmd, [<<"LRANGE">>, <<"MSGS">>, <<"0">>, <<"-1">>]} - ), - [El || El <- Results, El =:= Payload]. +added_msgs(ResourceId, BaseTopic, Payload) -> + lists:flatmap( + fun(K) -> + {ok, Results} = emqx_resource:simple_sync_query( + ResourceId, + {cmd, [<<"LRANGE">>, K, <<"0">>, <<"-1">>]} + ), + [El || El <- Results, El =:= Payload] + end, + [format_redis_key(BaseTopic, S) || S <- lists:seq(0, ?KEYSHARDS - 1)] + ). + +format_topic(Base, I) -> + iolist_to_binary(io_lib:format("~s/~2..0B", [Base, I rem ?KEYSHARDS])). + +format_redis_key(Base, I) -> + iolist_to_binary([?KEYPREFIX, "/", format_topic(Base, I)]). conf_schema(StructName) -> #{ @@ -479,12 +493,13 @@ redis_connect_configs() -> }, redis_cluster => #{ tcp => #{ - <<"servers">> => <<"redis-cluster:7000,redis-cluster:7001,redis-cluster:7002">>, + <<"servers">> => + <<"redis-cluster-1:6379,redis-cluster-2:6379,redis-cluster-3:6379">>, <<"redis_type">> => <<"cluster">> }, tls => #{ <<"servers">> => - <<"redis-cluster-tls:8000,redis-cluster-tls:8001,redis-cluster-tls:8002">>, + <<"redis-cluster-tls-1:6389,redis-cluster-tls-2:6389,redis-cluster-tls-3:6389">>, <<"redis_type">> => <<"cluster">>, <<"ssl">> => redis_connect_ssl_opts(redis_cluster) } @@ -494,7 +509,7 @@ redis_connect_configs() -> toxiproxy_redis_bridge_config() -> Conf0 = ?REDIS_TOXYPROXY_CONNECT_CONFIG#{ <<"resource_opts">> => #{ - <<"query_mode">> => <<"async">>, + <<"query_mode">> => <<"sync">>, <<"worker_pool_size">> => <<"1">>, <<"batch_size">> => integer_to_binary(?BATCH_SIZE), <<"health_check_interval">> => <<"1s">>, @@ -509,7 +524,6 @@ invalid_command_bridge_config() -> Conf1#{ <<"resource_opts">> => #{ <<"query_mode">> => <<"sync">>, - <<"batch_size">> => <<"1">>, <<"worker_pool_size">> => <<"1">>, <<"start_timeout">> => <<"15s">> }, @@ -520,11 +534,10 @@ resource_configs() -> #{ batch_off => #{ <<"query_mode">> => <<"sync">>, - <<"batch_size">> => <<"1">>, <<"start_timeout">> => <<"15s">> }, batch_on => #{ - <<"query_mode">> => <<"async">>, + <<"query_mode">> => <<"sync">>, <<"worker_pool_size">> => <<"1">>, <<"batch_size">> => integer_to_binary(?BATCH_SIZE), <<"start_timeout">> => <<"15s">> diff --git a/lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf b/lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf new file mode 100644 index 000000000..8da63dad9 --- /dev/null +++ b/lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf @@ -0,0 +1 @@ +telemetry.enable = false diff --git a/lib-ee/emqx_ee_connector/rebar.config b/lib-ee/emqx_ee_connector/rebar.config index 3af1868c7..00421e4f6 100644 --- a/lib-ee/emqx_ee_connector/rebar.config +++ b/lib-ee/emqx_ee_connector/rebar.config @@ -1,7 +1,7 @@ {erl_opts, [debug_info]}. {deps, [ {hstreamdb_erl, {git, "https://github.com/hstreamdb/hstreamdb_erl.git", {tag, "0.2.5"}}}, - {influxdb, {git, "https://github.com/emqx/influxdb-client-erl", {tag, "1.1.7"}}}, + {influxdb, {git, "https://github.com/emqx/influxdb-client-erl", {tag, "1.1.8"}}}, {emqx, {path, "../../apps/emqx"}} ]}. diff --git a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl index 0ee27c5c4..d689f4bf3 100644 --- a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl +++ b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl @@ -200,15 +200,15 @@ start_client(InstId, Config) -> ?SLOG(info, #{ msg => "starting influxdb connector", connector => InstId, - config => Config, - client_config => ClientConfig + config => emqx_misc:redact(Config), + client_config => emqx_misc:redact(ClientConfig) }), try do_start_client(InstId, ClientConfig, Config) catch E:R:S -> ?tp(influxdb_connector_start_exception, #{error => {E, R}}), - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => "start influxdb connector error", connector => InstId, error => E, @@ -236,16 +236,16 @@ do_start_client( ?SLOG(info, #{ msg => "starting influxdb connector success", connector => InstId, - client => Client, - state => State + client => redact_auth(Client), + state => redact_auth(State) }), {ok, State}; false -> ?tp(influxdb_connector_start_failed, #{error => influxdb_client_not_alive}), - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => "starting influxdb connector failed", connector => InstId, - client => Client, + client => redact_auth(Client), reason => "client is not alive" }), %% no leak @@ -257,13 +257,13 @@ do_start_client( ?SLOG(info, #{ msg => "restarting influxdb connector, found already started client", connector => InstId, - old_client => Client0 + old_client => redact_auth(Client0) }), _ = influxdb:stop_client(Client0), do_start_client(InstId, ClientConfig, Config); {error, Reason} -> ?tp(influxdb_connector_start_failed, #{error => Reason}), - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => "starting influxdb connector failed", connector => InstId, reason => Reason @@ -282,7 +282,7 @@ client_config( {host, str(Host)}, {port, Port}, {pool_size, erlang:system_info(schedulers)}, - {pool, binary_to_atom(InstId, utf8)}, + {pool, InstId}, {precision, atom_to_binary(maps:get(precision, Config, ms), utf8)} ] ++ protocol_config(Config). @@ -340,6 +340,14 @@ password(#{password := Password}) -> password(_) -> []. +redact_auth(Term) -> + emqx_misc:redact(Term, fun is_auth_key/1). + +is_auth_key(Key) when is_binary(Key) -> + string:equal("authorization", Key, true); +is_auth_key(_) -> + false. + %% ------------------------------------------------------------------------------------------------- %% Query do_query(InstId, Client, Points) -> @@ -623,6 +631,13 @@ is_unrecoverable_error(_) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +is_auth_key_test_() -> + [ + ?_assert(is_auth_key(<<"Authorization">>)), + ?_assertNot(is_auth_key(<<"Something">>)), + ?_assertNot(is_auth_key(89)) + ]. + %% for coverage desc_test_() -> [ diff --git a/mix.exs b/mix.exs index 767e3af9b..18e11fdd7 100644 --- a/mix.exs +++ b/mix.exs @@ -47,7 +47,7 @@ defmodule EMQXUmbrella.MixProject do {:lc, github: "emqx/lc", tag: "0.3.2", override: true}, {:redbug, "2.0.8"}, {:typerefl, github: "ieQu1/typerefl", tag: "0.9.1", override: true}, - {:ehttpc, github: "emqx/ehttpc", tag: "0.4.4", override: true}, + {:ehttpc, github: "emqx/ehttpc", tag: "0.4.6", override: true}, {:gproc, github: "uwiger/gproc", tag: "0.8.0", override: true}, {:jiffy, github: "emqx/jiffy", tag: "1.0.5", override: true}, {:cowboy, github: "emqx/cowboy", tag: "2.9.0", override: true}, @@ -57,8 +57,8 @@ defmodule EMQXUmbrella.MixProject do {:gen_rpc, github: "emqx/gen_rpc", tag: "2.8.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.7", override: true}, {:minirest, github: "emqx/minirest", tag: "1.3.8", override: true}, - {:ecpool, github: "emqx/ecpool", tag: "0.5.2", override: true}, - {:replayq, github: "emqx/replayq", tag: "0.3.6", override: true}, + {:ecpool, github: "emqx/ecpool", tag: "0.5.3", override: true}, + {:replayq, github: "emqx/replayq", tag: "0.3.7", override: true}, {:pbkdf2, github: "emqx/erlang-pbkdf2", tag: "2.0.4", override: true}, {:emqtt, github: "emqx/emqtt", tag: "1.7.0-rc.2", override: true}, {:rulesql, github: "emqx/rulesql", tag: "0.1.4"}, @@ -68,7 +68,7 @@ defmodule EMQXUmbrella.MixProject do # in conflict by emqtt and hocon {:getopt, "1.0.2", override: true}, {:snabbkaffe, github: "kafka4beam/snabbkaffe", tag: "1.0.0", override: true}, - {:hocon, github: "emqx/hocon", tag: "0.35.0", override: true}, + {:hocon, github: "emqx/hocon", tag: "0.35.3", override: true}, {:emqx_http_lib, github: "emqx/emqx_http_lib", tag: "0.5.2", override: true}, {:esasl, github: "emqx/esasl", tag: "0.2.0"}, {:jose, github: "potatosalad/erlang-jose", tag: "1.11.2"}, @@ -76,8 +76,6 @@ defmodule EMQXUmbrella.MixProject do {:gun, github: "emqx/gun", tag: "1.3.9", override: true}, # in conflict by emqx_connector and system_monitor {:epgsql, github: "emqx/epgsql", tag: "4.7.0.1", override: true}, - # in conflict by mongodb and eredis_cluster - {:poolboy, github: "emqx/poolboy", tag: "1.5.2", override: true}, # in conflict by emqx and observer_cli {:recon, github: "ferd/recon", tag: "2.5.1", override: true}, {:jsx, github: "talentdeficit/jsx", tag: "v3.1.0", override: true}, @@ -132,7 +130,7 @@ defmodule EMQXUmbrella.MixProject do [ {:hstreamdb_erl, github: "hstreamdb/hstreamdb_erl", tag: "0.2.5"}, {:influxdb, github: "emqx/influxdb-client-erl", tag: "1.1.7", override: true}, - {:wolff, github: "kafka4beam/wolff", tag: "1.7.4"}, + {:wolff, github: "kafka4beam/wolff", tag: "1.7.5"}, {:kafka_protocol, github: "kafka4beam/kafka_protocol", tag: "4.1.2", override: true}, {:brod_gssapi, github: "kafka4beam/brod_gssapi", tag: "v0.1.0-rc1"}, {:brod, github: "kafka4beam/brod", tag: "3.16.7"}, diff --git a/rebar.config b/rebar.config index d9744631f..1a9d651dc 100644 --- a/rebar.config +++ b/rebar.config @@ -49,7 +49,7 @@ , {gpb, "4.19.5"} %% gpb only used to build, but not for release, pin it here to avoid fetching a wrong version due to rebar plugins scattered in all the deps , {typerefl, {git, "https://github.com/ieQu1/typerefl", {tag, "0.9.1"}}} , {gun, {git, "https://github.com/emqx/gun", {tag, "1.3.9"}}} - , {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.4"}}} + , {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.6"}}} , {gproc, {git, "https://github.com/uwiger/gproc", {tag, "0.8.0"}}} , {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}} , {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.0"}}} @@ -59,8 +59,8 @@ , {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}} , {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.7"}}} , {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.8"}}} - , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.2"}}} - , {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.6"}}} + , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.3"}}} + , {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.7"}}} , {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}} , {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.7.0-rc.2"}}} , {rulesql, {git, "https://github.com/emqx/rulesql", {tag, "0.1.4"}}} @@ -68,7 +68,7 @@ , {system_monitor, {git, "https://github.com/ieQu1/system_monitor", {tag, "3.0.3"}}} , {getopt, "1.0.2"} , {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.0"}}} - , {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.0"}}} + , {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.3"}}} , {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.2"}}} , {esasl, {git, "https://github.com/emqx/esasl", {tag, "0.2.0"}}} , {jose, {git, "https://github.com/potatosalad/erlang-jose", {tag, "1.11.2"}}} diff --git a/scripts/merge-config.escript b/scripts/merge-config.escript index 1b30dbd1d..d30a0ca68 100755 --- a/scripts/merge-config.escript +++ b/scripts/merge-config.escript @@ -30,7 +30,7 @@ main(_) -> case IsEnterprise of true -> EnterpriseCfgs = get_all_cfgs("lib-ee"), - EnterpriseConf = merge("", EnterpriseCfgs), + EnterpriseConf = merge(<<"">>, EnterpriseCfgs), ok = file:write_file("apps/emqx_conf/etc/emqx-enterprise.conf.all", EnterpriseConf); false -> ok @@ -41,22 +41,21 @@ is_enterprise() -> nomatch =/= string:find(Profile, "enterprise"). merge(BaseConf, Cfgs) -> - lists:foldl( - fun(CfgFile, Acc) -> - case filelib:is_regular(CfgFile) of - true -> - {ok, Bin1} = file:read_file(CfgFile), - case string:trim(Bin1, both) of - <<>> -> Acc; - Bin2 -> [Acc, io_lib:nl(), io_lib:nl(), Bin2] - end; - false -> - Acc - end - end, - BaseConf, - Cfgs - ). + Confs = [BaseConf | lists:map(fun read_conf/1, Cfgs)], + infix(lists:filter(fun(I) -> iolist_size(I) > 0 end, Confs), [io_lib:nl(), io_lib:nl()]). + +read_conf(CfgFile) -> + case filelib:is_regular(CfgFile) of + true -> + {ok, Bin1} = file:read_file(CfgFile), + string:trim(Bin1, both); + false -> + <<>> + end. + +infix([], _With) -> []; +infix([One], _With) -> [One]; +infix([H | T], With) -> [H, With, infix(T, With)]. get_all_cfgs(Root) -> Apps0 = filelib:wildcard("*", Root) -- ["emqx_machine", "emqx_conf"], diff --git a/scripts/relup-build/download-base-packages.sh b/scripts/relup-build/download-base-packages.sh index 1a03f7ef8..fc4511b58 100755 --- a/scripts/relup-build/download-base-packages.sh +++ b/scripts/relup-build/download-base-packages.sh @@ -14,8 +14,10 @@ export PROFILE case $PROFILE in "emqx-enterprise") - DIR='emqx-ee' - EDITION='enterprise' + #S3DIR='emqx-ee' + #EDITION='enterprise' + echo "No relup for now" + exit 0 ;; "emqx") echo "No relup for opensource edition" @@ -51,7 +53,7 @@ mkdir -p _upgrade_base pushd _upgrade_base >/dev/null for tag in ${BASE_VERSIONS}; do filename="$PROFILE-$(fullvsn "${tag#[e|v]}").tar.gz" - url="https://packages.emqx.io/$DIR/$tag/$filename" + url="https://packages.emqx.io/$S3DIR/$tag/$filename" echo "downloading ${filename} ..." ## if the file does not exist (not downloaded yet) ## and there is such a package to downlaod diff --git a/scripts/test/influx/influx-bridge.conf b/scripts/test/influx/influx-bridge.conf index 31ddeaf79..3b5bb9f9f 100644 --- a/scripts/test/influx/influx-bridge.conf +++ b/scripts/test/influx/influx-bridge.conf @@ -34,16 +34,6 @@ bridges { } } } -log { - console_handler {enable = true, level = "warning"} - file_handlers { - default { - enable = false - file = "log/emqx.log" - level = "warning" - } - } -} rule_engine { ignore_sys_message = true jq_function_default_timeout = "10s"