diff --git a/.ci/docker-compose-file/docker-compose-kafka.yaml b/.ci/docker-compose-file/docker-compose-kafka.yaml index 63e74fa11..d4989bd0b 100644 --- a/.ci/docker-compose-file/docker-compose-kafka.yaml +++ b/.ci/docker-compose-file/docker-compose-kafka.yaml @@ -10,32 +10,34 @@ services: networks: emqx_bridge: ssl_cert_gen: - image: fredrikhgrelland/alpine-jdk11-openssl + # see https://github.com/emqx/docker-images + image: ghcr.io/emqx/certgen:latest container_name: ssl_cert_gen + user: "${DOCKER_USER:-root}" volumes: - - emqx-shared-secret:/var/lib/secret - - ./kafka/generate-certs.sh:/bin/generate-certs.sh - entrypoint: /bin/sh - command: /bin/generate-certs.sh + - /tmp/emqx-ci/emqx-shared-secret:/var/lib/secret kdc: hostname: kdc.emqx.net image: ghcr.io/emqx/emqx-builder/5.0-28:1.13.4-24.3.4.2-2-ubuntu20.04 container_name: kdc.emqx.net + expose: + - 88 # kdc + - 749 # admin server + # ports: + # - 88:88 + # - 749:749 networks: emqx_bridge: volumes: - - emqx-shared-secret:/var/lib/secret + - /tmp/emqx-ci/emqx-shared-secret:/var/lib/secret - ./kerberos/krb5.conf:/etc/kdc/krb5.conf - ./kerberos/krb5.conf:/etc/krb5.conf - ./kerberos/run.sh:/usr/bin/run.sh command: run.sh kafka_1: - image: wurstmeister/kafka:2.13-2.7.0 - ports: - - "9092:9092" - - "9093:9093" - - "9094:9094" - - "9095:9095" + image: wurstmeister/kafka:2.13-2.8.1 + # ports: + # - "9192-9195:9192-9195" container_name: kafka-1.emqx.net hostname: kafka-1.emqx.net depends_on: @@ -48,9 +50,9 @@ services: environment: KAFKA_BROKER_ID: 1 KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 - KAFKA_LISTENERS: PLAINTEXT://:9092,SASL_PLAINTEXT://:9093,SSL://:9094,SASL_SSL://:9095 - KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1.emqx.net:9092,SASL_PLAINTEXT://kafka-1.emqx.net:9093,SSL://kafka-1.emqx.net:9094,SASL_SSL://kafka-1.emqx.net:9095 - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SASL_PLAINTEXT:SASL_PLAINTEXT,SSL:SSL,SASL_SSL:SASL_SSL + KAFKA_LISTENERS: PLAINTEXT://:9092,SASL_PLAINTEXT://:9093,SSL://:9094,SASL_SSL://:9095,LOCAL_PLAINTEXT://:9192,LOCAL_SASL_PLAINTEXT://:9193,LOCAL_SSL://:9194,LOCAL_SASL_SSL://:9195,TOXIPROXY_PLAINTEXT://:9292,TOXIPROXY_SASL_PLAINTEXT://:9293,TOXIPROXY_SSL://:9294,TOXIPROXY_SASL_SSL://:9295 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1.emqx.net:9092,SASL_PLAINTEXT://kafka-1.emqx.net:9093,SSL://kafka-1.emqx.net:9094,SASL_SSL://kafka-1.emqx.net:9095,LOCAL_PLAINTEXT://localhost:9192,LOCAL_SASL_PLAINTEXT://localhost:9193,LOCAL_SSL://localhost:9194,LOCAL_SASL_SSL://localhost:9195,TOXIPROXY_PLAINTEXT://toxiproxy.emqx.net:9292,TOXIPROXY_SASL_PLAINTEXT://toxiproxy.emqx.net:9293,TOXIPROXY_SSL://toxiproxy.emqx.net:9294,TOXIPROXY_SASL_SSL://toxiproxy.emqx.net:9295 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SASL_PLAINTEXT:SASL_PLAINTEXT,SSL:SSL,SASL_SSL:SASL_SSL,LOCAL_PLAINTEXT:PLAINTEXT,LOCAL_SASL_PLAINTEXT:SASL_PLAINTEXT,LOCAL_SSL:SSL,LOCAL_SASL_SSL:SASL_SSL,TOXIPROXY_PLAINTEXT:PLAINTEXT,TOXIPROXY_SASL_PLAINTEXT:SASL_PLAINTEXT,TOXIPROXY_SSL:SSL,TOXIPROXY_SASL_SSL:SASL_SSL KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT KAFKA_SASL_ENABLED_MECHANISMS: PLAIN,SCRAM-SHA-256,SCRAM-SHA-512,GSSAPI KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka @@ -58,6 +60,7 @@ services: KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/jaas.conf" KAFKA_ALLOW_EVERYONE_IF_NO_ACL_FOUND: "true" KAFKA_CREATE_TOPICS_NG: test-topic-one-partition:1:1,test-topic-two-partitions:2:1,test-topic-three-partitions:3:1, + KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" KAFKA_AUTHORIZER_CLASS_NAME: kafka.security.auth.SimpleAclAuthorizer KAFKA_SSL_TRUSTSTORE_LOCATION: /var/lib/secret/kafka.truststore.jks KAFKA_SSL_TRUSTSTORE_PASSWORD: password @@ -67,7 +70,7 @@ services: networks: emqx_bridge: volumes: - - emqx-shared-secret:/var/lib/secret + - /tmp/emqx-ci/emqx-shared-secret:/var/lib/secret - ./kafka/jaas.conf:/etc/kafka/jaas.conf - ./kafka/kafka-entrypoint.sh:/bin/kafka-entrypoint.sh - ./kerberos/krb5.conf:/etc/kdc/krb5.conf diff --git a/.ci/docker-compose-file/docker-compose-toxiproxy.yaml b/.ci/docker-compose-file/docker-compose-toxiproxy.yaml index 3dd30af52..16f18b6c2 100644 --- a/.ci/docker-compose-file/docker-compose-toxiproxy.yaml +++ b/.ci/docker-compose-file/docker-compose-toxiproxy.yaml @@ -6,7 +6,10 @@ services: image: ghcr.io/shopify/toxiproxy:2.5.0 restart: always networks: - - emqx_bridge + emqx_bridge: + aliases: + - toxiproxy + - toxiproxy.emqx.net volumes: - "./toxiproxy.json:/config/toxiproxy.json" ports: diff --git a/.ci/docker-compose-file/docker-compose.yaml b/.ci/docker-compose-file/docker-compose.yaml index ff330872d..5c37d971e 100644 --- a/.ci/docker-compose-file/docker-compose.yaml +++ b/.ci/docker-compose-file/docker-compose.yaml @@ -18,12 +18,12 @@ services: - emqx_bridge volumes: - ../..:/emqx - - emqx-shared-secret:/var/lib/secret + - /tmp/emqx-ci/emqx-shared-secret:/var/lib/secret - ./kerberos/krb5.conf:/etc/kdc/krb5.conf - ./kerberos/krb5.conf:/etc/krb5.conf working_dir: /emqx tty: true - user: "${UID_GID}" + user: "${DOCKER_USER:-root}" networks: emqx_bridge: @@ -37,6 +37,3 @@ networks: gateway: 172.100.239.1 - subnet: 2001:3200:3200::/64 gateway: 2001:3200:3200::1 - -volumes: # add this section - emqx-shared-secret: # does not need anything underneath this diff --git a/.ci/docker-compose-file/kafka/generate-certs.sh b/.ci/docker-compose-file/kafka/generate-certs.sh deleted file mode 100755 index 3f1c75550..000000000 --- a/.ci/docker-compose-file/kafka/generate-certs.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/bash - -set -euo pipefail - -set -x - -# Source https://github.com/zmstone/docker-kafka/blob/master/generate-certs.sh - -HOST="*." -DAYS=3650 -PASS="password" - -cd /var/lib/secret/ - -# Delete old files -(rm ca.key ca.crt server.key server.csr server.crt client.key client.csr client.crt server.p12 kafka.keystore.jks kafka.truststore.jks 2>/dev/null || true) - -ls - -echo '== Generate self-signed server and client certificates' -echo '= generate CA' -openssl req -new -x509 -keyout ca.key -out ca.crt -days $DAYS -nodes -subj "/C=SE/ST=Stockholm/L=Stockholm/O=brod/OU=test/CN=$HOST" - -echo '= generate server certificate request' -openssl req -newkey rsa:2048 -sha256 -keyout server.key -out server.csr -days "$DAYS" -nodes -subj "/C=SE/ST=Stockholm/L=Stockholm/O=brod/OU=test/CN=$HOST" - -echo '= sign server certificate' -openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days "$DAYS" -CAcreateserial - -echo '= generate client certificate request' -openssl req -newkey rsa:2048 -sha256 -keyout client.key -out client.csr -days "$DAYS" -nodes -subj "/C=SE/ST=Stockholm/L=Stockholm/O=brod/OU=test/CN=$HOST" - -echo '== sign client certificate' -openssl x509 -req -CA ca.crt -CAkey ca.key -in client.csr -out client.crt -days $DAYS -CAserial ca.srl - -echo '= Convert self-signed certificate to PKCS#12 format' -openssl pkcs12 -export -name "$HOST" -in server.crt -inkey server.key -out server.p12 -CAfile ca.crt -passout pass:"$PASS" - -echo '= Import PKCS#12 into a java keystore' - -echo $PASS | keytool -importkeystore -destkeystore kafka.keystore.jks -srckeystore server.p12 -srcstoretype pkcs12 -alias "$HOST" -storepass "$PASS" - - -echo '= Import CA into java truststore' - -echo yes | keytool -keystore kafka.truststore.jks -alias CARoot -import -file ca.crt -storepass "$PASS" diff --git a/.ci/docker-compose-file/kafka/kafka-entrypoint.sh b/.ci/docker-compose-file/kafka/kafka-entrypoint.sh index 445fd65c9..336a78e74 100755 --- a/.ci/docker-compose-file/kafka/kafka-entrypoint.sh +++ b/.ci/docker-compose-file/kafka/kafka-entrypoint.sh @@ -17,6 +17,7 @@ timeout $TIMEOUT bash -c 'until [ -f /var/lib/secret/kafka.keytab ]; do sleep 1; echo "+++++++ Wait until SSL certs are generated ++++++++" timeout $TIMEOUT bash -c 'until [ -f /var/lib/secret/kafka.truststore.jks ]; do sleep 1; done' +keytool -list -v -keystore /var/lib/secret/kafka.keystore.jks -storepass password sleep 3 diff --git a/.ci/docker-compose-file/toxiproxy.json b/.ci/docker-compose-file/toxiproxy.json index 6188eab17..2f8c4341b 100644 --- a/.ci/docker-compose-file/toxiproxy.json +++ b/.ci/docker-compose-file/toxiproxy.json @@ -53,5 +53,29 @@ "listen": "0.0.0.0:8000", "upstream": "dynamo:8000", "enabled": true + }, + { + "name": "kafka_plain", + "listen": "0.0.0.0:9292", + "upstream": "kafka-1.emqx.net:9292", + "enabled": true + }, + { + "name": "kafka_sasl_plain", + "listen": "0.0.0.0:9293", + "upstream": "kafka-1.emqx.net:9293", + "enabled": true + }, + { + "name": "kafka_ssl", + "listen": "0.0.0.0:9294", + "upstream": "kafka-1.emqx.net:9294", + "enabled": true + }, + { + "name": "kafka_sasl_ssl", + "listen": "0.0.0.0:9295", + "upstream": "kafka-1.emqx.net:9295", + "enabled": true } ] diff --git a/Makefile b/Makefile index 370c861d6..75c27d910 100644 --- a/Makefile +++ b/Makefile @@ -107,7 +107,7 @@ endef $(foreach app,$(APPS),$(eval $(call gen-app-prop-target,$(app)))) .PHONY: ct-suite -ct-suite: $(REBAR) +ct-suite: $(REBAR) merge-config ifneq ($(TESTCASE),) ifneq ($(GROUP),) $(REBAR) ct -v --readable=$(CT_READABLE) --name $(CT_NODE_NAME) --suite $(SUITE) --case $(TESTCASE) --group $(GROUP) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index b62ca6b3c..229979f6c 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -33,7 +33,7 @@ {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.2"}}}, {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}}, {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.1"}}}, - {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.0"}}} + {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.7"}}} ]}. {plugins, [{rebar3_proper, "0.12.1"}]}. diff --git a/apps/emqx/test/emqx_common_test_helpers.erl b/apps/emqx/test/emqx_common_test_helpers.erl index c26e63a62..38f30b8c5 100644 --- a/apps/emqx/test/emqx_common_test_helpers.erl +++ b/apps/emqx/test/emqx_common_test_helpers.erl @@ -37,6 +37,7 @@ deps_path/2, flush/0, flush/1, + load/1, render_and_load_app_config/1, render_and_load_app_config/2 ]). @@ -66,14 +67,16 @@ emqx_cluster/2, start_epmd/0, start_slave/2, - stop_slave/1 + stop_slave/1, + listener_port/2 ]). -export([clear_screen/0]). -export([with_mock/4]). -export([ on_exit/1, - call_janitor/0 + call_janitor/0, + call_janitor/1 ]). %% Toxiproxy API @@ -587,6 +590,12 @@ ensure_quic_listener(Name, UdpPort, ExtraSettings) -> %% Whether to execute `emqx_config:init_load(SchemaMod)` %% default: true load_schema => boolean(), + %% If we want to exercise the scenario where a node joins an + %% existing cluster where there has already been some + %% configuration changes (via cluster rpc), then we need to enable + %% autocluster so that the joining node will restart the + %% `emqx_conf' app and correctly catch up the config. + start_autocluster => boolean(), %% Eval by emqx_config:put/2 conf => [{KeyPath :: list(), Val :: term()}], %% Fast option to config listener port @@ -637,25 +646,53 @@ emqx_cluster(Specs0, CommonOpts) -> %% Lower level starting API -spec start_slave(shortname(), node_opts()) -> nodename(). -start_slave(Name, Opts) -> - {ok, Node} = ct_slave:start( - list_to_atom(atom_to_list(Name) ++ "@" ++ host()), - [ - {kill_if_fail, true}, - {monitor_master, true}, - {init_timeout, 10000}, - {startup_timeout, 10000}, - {erl_flags, erl_flags()} - ] - ), - +start_slave(Name, Opts) when is_list(Opts) -> + start_slave(Name, maps:from_list(Opts)); +start_slave(Name, Opts) when is_map(Opts) -> + SlaveMod = maps:get(peer_mod, Opts, ct_slave), + Node = node_name(Name), + DoStart = + fun() -> + case SlaveMod of + ct_slave -> + ct_slave:start( + Node, + [ + {kill_if_fail, true}, + {monitor_master, true}, + {init_timeout, 10000}, + {startup_timeout, 10000}, + {erl_flags, erl_flags()} + ] + ); + slave -> + slave:start_link(host(), Name, ebin_path()) + end + end, + case DoStart() of + {ok, _} -> + ok; + {error, started_not_connected, _} -> + ok; + Other -> + throw(Other) + end, pong = net_adm:ping(Node), + put_peer_mod(Node, SlaveMod), setup_node(Node, Opts), + ok = snabbkaffe:forward_trace(Node), Node. %% Node stopping -stop_slave(Node) -> - ct_slave:stop(Node). +stop_slave(Node0) -> + Node = node_name(Node0), + SlaveMod = get_peer_mod(Node), + erase_peer_mod(Node), + case SlaveMod:stop(Node) of + ok -> ok; + {ok, _} -> ok; + {error, not_started, _} -> ok + end. %% EPMD starting start_epmd() -> @@ -693,9 +730,27 @@ setup_node(Node, Opts) when is_map(Opts) -> {Type, listener_port(BasePort, Type)} || Type <- [tcp, ssl, ws, wss] ]), + %% we need a fresh data dir for each peer node to avoid unintended + %% successes due to sharing of data in the cluster. + PrivDataDir = maps:get(priv_data_dir, Opts, "/tmp"), + %% If we want to exercise the scenario where a node joins an + %% existing cluster where there has already been some + %% configuration changes (via cluster rpc), then we need to enable + %% autocluster so that the joining node will restart the + %% `emqx_conf' app and correctly catch up the config. + StartAutocluster = maps:get(start_autocluster, Opts, false), %% Load env before doing anything to avoid overriding - [ok = rpc:call(Node, application, load, [App]) || App <- LoadApps], + lists:foreach(fun(App) -> rpc:call(Node, ?MODULE, load, [App]) end, LoadApps), + %% Ensure a clean mnesia directory for each run to avoid + %% inter-test flakiness. + MnesiaDataDir = filename:join([ + PrivDataDir, + node(), + integer_to_list(erlang:unique_integer()), + "mnesia" + ]), + erpc:call(Node, application, set_env, [mnesia, dir, MnesiaDataDir]), %% Needs to be set explicitly because ekka:start() (which calls `gen`) is called without Handler %% in emqx_common_test_helpers:start_apps(...) @@ -721,7 +776,19 @@ setup_node(Node, Opts) when is_map(Opts) -> %% Otherwise, configuration gets loaded and all preset env in EnvHandler is lost LoadSchema andalso begin + %% to avoid sharing data between executions and/or + %% nodes. these variables might notbe in the + %% config file (e.g.: emqx_ee_conf_schema). + NodeDataDir = filename:join([ + PrivDataDir, + node(), + integer_to_list(erlang:unique_integer()) + ]), + os:putenv("EMQX_NODE__DATA_DIR", NodeDataDir), + os:putenv("EMQX_NODE__COOKIE", atom_to_list(erlang:get_cookie())), emqx_config:init_load(SchemaMod), + os:unsetenv("EMQX_NODE__DATA_DIR"), + os:unsetenv("EMQX_NODE__COOKIE"), application:set_env(emqx, init_config_load_done, true) end, @@ -748,6 +815,8 @@ setup_node(Node, Opts) when is_map(Opts) -> undefined -> ok; _ -> + StartAutocluster andalso + (ok = rpc:call(Node, emqx_machine_boot, start_autocluster, [])), case rpc:call(Node, ekka, join, [JoinTo]) of ok -> ok; @@ -762,8 +831,27 @@ setup_node(Node, Opts) when is_map(Opts) -> %% Helpers +put_peer_mod(Node, SlaveMod) -> + put({?MODULE, Node}, SlaveMod), + ok. + +get_peer_mod(Node) -> + case get({?MODULE, Node}) of + undefined -> ct_slave; + SlaveMod -> SlaveMod + end. + +erase_peer_mod(Node) -> + erase({?MODULE, Node}). + node_name(Name) -> - list_to_atom(lists:concat([Name, "@", host()])). + case string:tokens(atom_to_list(Name), "@") of + [_Name, _Host] -> + %% the name already has a @ + Name; + _ -> + list_to_atom(atom_to_list(Name) ++ "@" ++ host()) + end. gen_node_name(Num) -> list_to_atom("autocluster_node" ++ integer_to_list(Num)). @@ -804,6 +892,9 @@ base_port(Number) -> gen_rpc_port(BasePort) -> BasePort - 1. +listener_port(Opts, Type) when is_map(Opts) -> + BasePort = maps:get(base_port, Opts), + listener_port(BasePort, Type); listener_port(BasePort, tcp) -> BasePort; listener_port(BasePort, ssl) -> @@ -988,8 +1079,11 @@ latency_up_proxy(off, Name, ProxyHost, ProxyPort) -> %% stop the janitor gracefully to ensure proper cleanup order and less %% noise in the logs. call_janitor() -> + call_janitor(15_000). + +call_janitor(Timeout) -> Janitor = get_or_spawn_janitor(), - exit(Janitor, normal), + ok = emqx_test_janitor:stop(Janitor, Timeout), ok. get_or_spawn_janitor() -> diff --git a/apps/emqx/test/emqx_test_janitor.erl b/apps/emqx/test/emqx_test_janitor.erl index 07d09aca1..c9b297dc7 100644 --- a/apps/emqx/test/emqx_test_janitor.erl +++ b/apps/emqx/test/emqx_test_janitor.erl @@ -30,6 +30,8 @@ %% API -export([ start_link/0, + stop/1, + stop/2, push_on_exit_callback/2 ]). @@ -40,6 +42,12 @@ start_link() -> gen_server:start_link(?MODULE, self(), []). +stop(Server) -> + stop(Server, 15_000). + +stop(Server, Timeout) -> + gen_server:call(Server, terminate, Timeout). + push_on_exit_callback(Server, Callback) when is_function(Callback, 0) -> gen_server:call(Server, {push, Callback}). @@ -52,10 +60,13 @@ init(Parent) -> {ok, #{callbacks => [], owner => Parent}}. terminate(_Reason, #{callbacks := Callbacks}) -> - lists:foreach(fun(Fun) -> Fun() end, Callbacks). + lists:foreach(fun(Fun) -> catch Fun() end, Callbacks). handle_call({push, Callback}, _From, State = #{callbacks := Callbacks}) -> {reply, ok, State#{callbacks := [Callback | Callbacks]}}; +handle_call(terminate, _From, State = #{callbacks := Callbacks}) -> + lists:foreach(fun(Fun) -> Fun() end, Callbacks), + {stop, normal, ok, State}; handle_call(_Req, _From, State) -> {reply, error, State}. diff --git a/apps/emqx_bridge/rebar.config b/apps/emqx_bridge/rebar.config index 0a1cbc29b..79f2caf50 100644 --- a/apps/emqx_bridge/rebar.config +++ b/apps/emqx_bridge/rebar.config @@ -1,5 +1,7 @@ {erl_opts, [debug_info]}. -{deps, [{emqx, {path, "../emqx"}}]}. +{deps, [ {emqx, {path, "../emqx"}} + , {emqx_resource, {path, "../../apps/emqx_resource"}} + ]}. {shell, [ % {config, "config/sys.config"}, diff --git a/apps/emqx_bridge/src/emqx_bridge.app.src b/apps/emqx_bridge/src/emqx_bridge.app.src index 37ec1266a..99a49f8fd 100644 --- a/apps/emqx_bridge/src/emqx_bridge.app.src +++ b/apps/emqx_bridge/src/emqx_bridge.app.src @@ -2,7 +2,7 @@ {application, emqx_bridge, [ {description, "EMQX bridges"}, {vsn, "0.1.13"}, - {registered, []}, + {registered, [emqx_bridge_sup]}, {mod, {emqx_bridge_app, []}}, {applications, [ kernel, diff --git a/apps/emqx_bridge/src/emqx_bridge.erl b/apps/emqx_bridge/src/emqx_bridge.erl index 292369d36..98ce6a8b0 100644 --- a/apps/emqx_bridge/src/emqx_bridge.erl +++ b/apps/emqx_bridge/src/emqx_bridge.erl @@ -55,6 +55,10 @@ T == gcp_pubsub; T == influxdb_api_v1; T == influxdb_api_v2; + %% TODO: rename this to `kafka_producer' after alias support is + %% added to hocon; keeping this as just `kafka' for backwards + %% compatibility. + T == kafka; T == redis_single; T == redis_sentinel; T == redis_cluster; @@ -137,12 +141,12 @@ load_hook(Bridges) -> maps:to_list(Bridges) ). -do_load_hook(Type, #{local_topic := _}) when ?EGRESS_DIR_BRIDGES(Type) -> +do_load_hook(Type, #{local_topic := LocalTopic}) when + ?EGRESS_DIR_BRIDGES(Type) andalso is_binary(LocalTopic) +-> emqx_hooks:put('message.publish', {?MODULE, on_message_publish, []}, ?HP_BRIDGE); do_load_hook(mqtt, #{egress := #{local := #{topic := _}}}) -> emqx_hooks:put('message.publish', {?MODULE, on_message_publish, []}, ?HP_BRIDGE); -do_load_hook(kafka, #{producer := #{mqtt := #{topic := _}}}) -> - emqx_hooks:put('message.publish', {?MODULE, on_message_publish, []}, ?HP_BRIDGE); do_load_hook(_Type, _Conf) -> ok. @@ -223,6 +227,7 @@ post_config_update(_, _Req, NewConf, OldConf, _AppEnv) -> ]), ok = unload_hook(), ok = load_hook(NewConf), + ?tp(bridge_post_config_update_done, #{}), Result. list() -> @@ -407,8 +412,8 @@ get_matched_bridge_id(BType, Conf, Topic, BName, Acc) when ?EGRESS_DIR_BRIDGES(B end; get_matched_bridge_id(mqtt, #{egress := #{local := #{topic := Filter}}}, Topic, BName, Acc) -> do_get_matched_bridge_id(Topic, Filter, mqtt, BName, Acc); -get_matched_bridge_id(kafka, #{producer := #{mqtt := #{topic := Filter}}}, Topic, BName, Acc) -> - do_get_matched_bridge_id(Topic, Filter, kafka, BName, Acc). +get_matched_bridge_id(_BType, _Conf, _Topic, _BName, Acc) -> + Acc. do_get_matched_bridge_id(Topic, Filter, BType, BName, Acc) -> case emqx_topic:match(Topic, Filter) of diff --git a/apps/emqx_bridge/src/emqx_bridge_resource.erl b/apps/emqx_bridge/src/emqx_bridge_resource.erl index 53fc7df4c..6426a46b7 100644 --- a/apps/emqx_bridge/src/emqx_bridge_resource.erl +++ b/apps/emqx_bridge/src/emqx_bridge_resource.erl @@ -16,6 +16,7 @@ -module(emqx_bridge_resource). -include_lib("emqx/include/logger.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). -export([ bridge_to_resource_type/1, @@ -45,7 +46,12 @@ ]). %% bi-directional bridge with producer/consumer or ingress/egress configs --define(IS_BI_DIR_BRIDGE(TYPE), TYPE =:= <<"mqtt">>; TYPE =:= <<"kafka">>). +-define(IS_BI_DIR_BRIDGE(TYPE), + (TYPE) =:= <<"mqtt">> +). +-define(IS_INGRESS_BRIDGE(TYPE), + (TYPE) =:= <<"kafka_consumer">> orelse ?IS_BI_DIR_BRIDGE(TYPE) +). -if(?EMQX_RELEASE_EDITION == ee). bridge_to_resource_type(<<"mqtt">>) -> emqx_connector_mqtt; @@ -219,7 +225,7 @@ recreate(Type, Name, Conf, Opts) -> ). create_dry_run(Type, Conf0) -> - TmpPath0 = iolist_to_binary(["bridges-create-dry-run:", emqx_misc:gen_id(8)]), + TmpPath0 = iolist_to_binary([?TEST_ID_PREFIX, emqx_misc:gen_id(8)]), TmpPath = emqx_misc:safe_filename(TmpPath0), Conf = emqx_map_lib:safe_atom_key_map(Conf0), case emqx_connector_ssl:convert_certs(TmpPath, Conf) of @@ -297,12 +303,16 @@ parse_confs( max_retries => Retry } }; -parse_confs(Type, Name, Conf) when ?IS_BI_DIR_BRIDGE(Type) -> +parse_confs(Type, Name, Conf) when ?IS_INGRESS_BRIDGE(Type) -> %% For some drivers that can be used as data-sources, we need to provide a %% hookpoint. The underlying driver will run `emqx_hooks:run/3` when it %% receives a message from the external database. BId = bridge_id(Type, Name), Conf#{hookpoint => <<"$bridges/", BId/binary>>, bridge_name => Name}; +%% TODO: rename this to `kafka_producer' after alias support is added +%% to hocon; keeping this as just `kafka' for backwards compatibility. +parse_confs(<<"kafka">> = _Type, Name, Conf) -> + Conf#{bridge_name => Name}; parse_confs(_Type, _Name, Conf) -> Conf. diff --git a/apps/emqx_bridge/src/emqx_bridge_sup.erl b/apps/emqx_bridge/src/emqx_bridge_sup.erl index a5e72a8c6..46a87b74f 100644 --- a/apps/emqx_bridge/src/emqx_bridge_sup.erl +++ b/apps/emqx_bridge/src/emqx_bridge_sup.erl @@ -34,5 +34,3 @@ init([]) -> }, ChildSpecs = [], {ok, {SupFlags, ChildSpecs}}. - -%% internal functions diff --git a/apps/emqx_bridge/test/emqx_bridge_webhook_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_webhook_SUITE.erl index 61df9bd29..f249aa95e 100644 --- a/apps/emqx_bridge/test/emqx_bridge_webhook_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_webhook_SUITE.erl @@ -38,7 +38,6 @@ init_per_suite(_Config) -> ok = emqx_common_test_helpers:start_apps([emqx_conf, emqx_bridge]), ok = emqx_connector_test_helpers:start_apps([emqx_resource]), {ok, _} = application:ensure_all_started(emqx_connector), - snabbkaffe:fix_ct_logging(), []. end_per_suite(_Config) -> diff --git a/apps/emqx_conf/src/emqx_cluster_rpc.erl b/apps/emqx_conf/src/emqx_cluster_rpc.erl index b2f06f35a..89f678554 100644 --- a/apps/emqx_conf/src/emqx_cluster_rpc.erl +++ b/apps/emqx_conf/src/emqx_cluster_rpc.erl @@ -204,7 +204,7 @@ do_multicall(M, F, A, RequiredSyncs, Timeout) -> query(TnxId) -> transaction(fun ?MODULE:trans_query/1, [TnxId]). --spec reset() -> reset. +-spec reset() -> ok. reset() -> gen_server:call(?MODULE, reset). -spec status() -> {'atomic', [map()]} | {'aborted', Reason :: term()}. diff --git a/apps/emqx_conf/test/emqx_conf_app_SUITE.erl b/apps/emqx_conf/test/emqx_conf_app_SUITE.erl index 84ced5362..c34eb9dc3 100644 --- a/apps/emqx_conf/test/emqx_conf_app_SUITE.erl +++ b/apps/emqx_conf/test/emqx_conf_app_SUITE.erl @@ -25,7 +25,6 @@ all() -> emqx_common_test_helpers:all(?MODULE). t_copy_conf_override_on_restarts(_Config) -> - net_kernel:start(['master@127.0.0.1', longnames]), ct:timetrap({seconds, 120}), snabbkaffe:fix_ct_logging(), Cluster = cluster([core, core, core]), @@ -165,11 +164,10 @@ cluster(Specs) -> {env, Env}, {apps, [emqx_conf]}, {load_schema, false}, - {join_to, false}, + {join_to, true}, {env_handler, fun (emqx) -> application:set_env(emqx, boot_modules, []), - io:format("~p~p~n", [node(), application:get_all_env(emqx)]), ok; (_) -> ok diff --git a/apps/emqx_plugins/test/emqx_plugins_SUITE.erl b/apps/emqx_plugins/test/emqx_plugins_SUITE.erl index f91233132..260ad1681 100644 --- a/apps/emqx_plugins/test/emqx_plugins_SUITE.erl +++ b/apps/emqx_plugins/test/emqx_plugins_SUITE.erl @@ -559,8 +559,8 @@ group_t_copy_plugin_to_a_new_node({'end', Config}) -> ok = rpc:call(CopyToNode, emqx_config, delete_override_conf_files, []), rpc:call(CopyToNode, ekka, leave, []), rpc:call(CopyFromNode, ekka, leave, []), - {ok, _} = emqx_common_test_helpers:stop_slave(CopyToNode), - {ok, _} = emqx_common_test_helpers:stop_slave(CopyFromNode), + ok = emqx_common_test_helpers:stop_slave(CopyToNode), + ok = emqx_common_test_helpers:stop_slave(CopyFromNode), ok = file:del_dir_r(proplists:get_value(to_install_dir, Config)), ok = file:del_dir_r(proplists:get_value(from_install_dir, Config)); group_t_copy_plugin_to_a_new_node(Config) -> diff --git a/apps/emqx_resource/include/emqx_resource.hrl b/apps/emqx_resource/include/emqx_resource.hrl index be570e694..41be9e8a0 100644 --- a/apps/emqx_resource/include/emqx_resource.hrl +++ b/apps/emqx_resource/include/emqx_resource.hrl @@ -119,5 +119,5 @@ -define(AUTO_RESTART_INTERVAL, 60000). -define(AUTO_RESTART_INTERVAL_RAW, <<"60s">>). --define(TEST_ID_PREFIX, "_test_:"). +-define(TEST_ID_PREFIX, "_probe_:"). -define(RES_METRICS, resource_metrics). diff --git a/changes/ee/feat-9564.en.md b/changes/ee/feat-9564.en.md new file mode 100644 index 000000000..4405e3e07 --- /dev/null +++ b/changes/ee/feat-9564.en.md @@ -0,0 +1,2 @@ +Implemented Kafka Consumer bridge. +Now it's possible to consume messages from Kafka and publish them to MQTT topics. diff --git a/changes/ee/feat-9564.zh.md b/changes/ee/feat-9564.zh.md new file mode 100644 index 000000000..01a7ffe58 --- /dev/null +++ b/changes/ee/feat-9564.zh.md @@ -0,0 +1,2 @@ +实现了 Kafka 消费者桥接。 +现在可以从 Kafka 消费消息并将其发布到 MQTT 主题。 diff --git a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf index c41b95c3a..a126a0b37 100644 --- a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf +++ b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf @@ -39,6 +39,16 @@ emqx_ee_bridge_kafka { zh: "桥接名字" } } + kafka_producer { + desc { + en: "Kafka Producer configuration." + zh: "Kafka Producer 配置。" + } + label { + en: "Kafka Producer" + zh: "Kafka Producer" + } + } producer_opts { desc { en: "Local MQTT data source and Kafka bridge configs." @@ -49,20 +59,10 @@ emqx_ee_bridge_kafka { zh: "MQTT 到 Kafka" } } - producer_mqtt_opts { - desc { - en: "MQTT data source. Optional when used as a rule-engine action." - zh: "需要桥接到 MQTT 源主题。" - } - label { - en: "MQTT Source Topic" - zh: "MQTT 源主题" - } - } mqtt_topic { desc { - en: "MQTT topic or topic as data source (bridge input)." - zh: "指定 MQTT 主题作为桥接的数据源" + en: "MQTT topic or topic as data source (bridge input). Should not configure this if the bridge is used as a rule action." + zh: "指定 MQTT 主题作为桥接的数据源。 若该桥接用于规则的动作,则必须将该配置项删除。" } label { en: "Source MQTT Topic" @@ -218,7 +218,7 @@ emqx_ee_bridge_kafka { } socket_nodelay { desc { - en: "When set to 'true', TCP buffer sent as soon as possible. " + en: "When set to 'true', TCP buffer is sent as soon as possible. " "Otherwise, the OS kernel may buffer small TCP packets for a while (40 ms by default)." zh: "设置‘true’让系统内核立即发送。否则当需要发送的内容很少时,可能会有一定延迟(默认 40 毫秒)。" } @@ -473,4 +473,177 @@ emqx_ee_bridge_kafka { zh: "GSSAPI/Kerberos" } } + + kafka_consumer { + desc { + en: "Kafka Consumer configuration." + zh: "Kafka 消费者配置。" + } + label { + en: "Kafka Consumer" + zh: "Kafka 消费者" + } + } + consumer_opts { + desc { + en: "Local MQTT publish and Kafka consumer configs." + zh: "本地 MQTT 转发 和 Kafka 消费者配置。" + } + label { + en: "MQTT to Kafka" + zh: "MQTT 到 Kafka" + } + } + consumer_kafka_opts { + desc { + en: "Kafka consumer configs." + zh: "Kafka消费者配置。" + } + label { + en: "Kafka Consumer" + zh: "Kafka 消费者" + } + } + consumer_mqtt_opts { + desc { + en: "Local MQTT message publish." + zh: "本地 MQTT 消息转发。" + } + label { + en: "MQTT publish" + zh: "MQTT 转发" + } + } + consumer_mqtt_topic { + desc { + en: "Local topic to which consumed Kafka messages should be published to." + zh: "设置 Kafka 消息向哪个本地 MQTT 主题转发消息。" + } + label { + en: "MQTT Topic" + zh: "MQTT主题" + } + } + consumer_mqtt_qos { + desc { + en: "MQTT QoS used to publish messages consumed from Kafka." + zh: "转发 MQTT 消息时使用的 QoS。" + } + label { + en: "QoS" + zh: "QoS" + } + } + consumer_mqtt_payload { + desc { + en: "The template for transforming the incoming Kafka message." + " By default, it will use JSON format to serialize" + " inputs from the Kafka message. Such fields are:\n" + "headers: an object containing string key-value pairs.\n" + "key: Kafka message key (uses the chosen key encoding).\n" + "offset: offset for the message.\n" + "topic: Kafka topic.\n" + "ts: message timestamp.\n" + "ts_type: message timestamp type, which is one of" + " create, append or undefined.\n" + "value: Kafka message value (uses the chosen value encoding).\n" + zh: "用于转换收到的 Kafka 消息的模板。 " + "默认情况下,它将使用 JSON 格式来序列化来自 Kafka 的所有字段。 " + "这些字段包括:" + "headers:一个包含字符串键值对的 JSON 对象。\n" + "key:Kafka 消息的键(使用选择的编码方式编码)。\n" + "offset:消息的偏移量。\n" + "topic:Kafka 主题。\n" + "ts: 消息的时间戳。\n" + "ts_type:消息的时间戳类型,值可能是:" + " createappendundefined。\n" + "value: Kafka 消息值(使用选择的编码方式编码)。\n" + + } + label { + en: "MQTT Payload Template" + zh: "MQTT Payload Template" + } + } + consumer_kafka_topic { + desc { + en: "Kafka topic to consume from." + zh: "指定从哪个 Kafka 主题消费消息。" + } + label { + en: "Kafka topic" + zh: "Kafka 主题 " + } + } + consumer_max_batch_bytes { + desc { + en: "Maximum bytes to fetch in a batch of messages." + "Please note that if the configured value is smaller than the message size in Kafka, it may negatively impact the fetch performance." + zh: "在一批消息中要取的最大字节数。" + "如果该配置小于 Kafka 中消息到大小,则可能会影响消费性能。" + } + label { + en: "Max Bytes" + zh: "最大字节数" + } + } + consumer_max_rejoin_attempts { + desc { + en: "Maximum number of times allowed for a member to re-join the group. If the consumer group can not reach balance after this configured number of attempts, the consumer group member will restart after a delay." + zh: "允许一个成员重新加入小组的最大次数。如果超过改配置次数后仍不能成功加入消费组,则会在延迟一段时间后再重试。" + } + label { + en: "Max Rejoin Attempts" + zh: "最大的重新加入尝试" + } + } + consumer_offset_reset_policy { + desc { + en: "Defines how the consumers should reset the start offset when " + "a topic partition has and invalid or no initial offset." + zh: "定义当一个主题分区的初始偏移量无效或没有初始偏移量时," + "消费者应如何重置开始偏移量。" + } + label { + en: "Offset Reset Policy" + zh: "偏移重置策略" + } + } + consumer_offset_commit_interval_seconds { + desc { + en: "Defines the time interval between two offset commit requests sent for each consumer group." + zh: "指定 Kafka 消费组偏移量提交的时间间隔。" + } + label { + en: "Offset Commit Interval" + zh: "偏移承诺间隔" + } + } + consumer_topic_mapping { + desc { + en: "Defines the mapping between Kafka topics and MQTT topics. Must contain at least one item." + zh: "指定 Kafka 主题和 MQTT 主题之间的映射。 必须至少包含一个项目。" + } + label { + en: "Topic Mapping" + zh: "主题映射关系" + } + } + consumer_encoding_mode { + desc { + en: "Defines how the key or value from the Kafka message is" + " dealt with before being forwarded via MQTT.\n" + "none Uses the key or value from the Kafka message unchanged." + " Note: in this case, then the key or value must be a valid UTF-8 string.\n" + "base64 Uses base-64 encoding on the received key or value." + zh: "定义了在通过MQTT转发之前如何处理Kafka消息的键或值。" + "none 使用Kafka消息中的键或值,不改变。" + " 注意:在这种情况下,那么键或值必须是一个有效的UTF-8字符串。\n" + "base64 对收到的密钥或值使用base-64编码。" + } + label { + en: "Encoding Mode" + zh: "编码模式" + } + } } diff --git a/lib-ee/emqx_ee_bridge/rebar.config b/lib-ee/emqx_ee_bridge/rebar.config index fa6dd560e..be0cb5345 100644 --- a/lib-ee/emqx_ee_bridge/rebar.config +++ b/lib-ee/emqx_ee_bridge/rebar.config @@ -2,7 +2,7 @@ {deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.5"}}} , {kafka_protocol, {git, "https://github.com/kafka4beam/kafka_protocol.git", {tag, "4.1.2"}}} , {brod_gssapi, {git, "https://github.com/kafka4beam/brod_gssapi.git", {tag, "v0.1.0-rc1"}}} - , {brod, {git, "https://github.com/kafka4beam/brod.git", {tag, "3.16.7"}}} + , {brod, {git, "https://github.com/kafka4beam/brod.git", {tag, "3.16.8"}}} , {emqx_connector, {path, "../../apps/emqx_connector"}} , {emqx_resource, {path, "../../apps/emqx_resource"}} , {emqx_bridge, {path, "../../apps/emqx_bridge"}} diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.app.src b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.app.src index ac181b251..6647ec212 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.app.src +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.app.src @@ -1,7 +1,7 @@ {application, emqx_ee_bridge, [ {description, "EMQX Enterprise data bridges"}, {vsn, "0.1.7"}, - {registered, []}, + {registered, [emqx_ee_bridge_kafka_consumer_sup]}, {applications, [ kernel, stdlib, diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.erl index b5c656291..ec81b7935 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge.erl @@ -15,7 +15,8 @@ api_schemas(Method) -> [ ref(emqx_ee_bridge_gcp_pubsub, Method), - ref(emqx_ee_bridge_kafka, Method), + ref(emqx_ee_bridge_kafka, Method ++ "_consumer"), + ref(emqx_ee_bridge_kafka, Method ++ "_producer"), ref(emqx_ee_bridge_mysql, Method), ref(emqx_ee_bridge_pgsql, Method), ref(emqx_ee_bridge_mongodb, Method ++ "_rs"), @@ -64,7 +65,10 @@ examples(Method) -> lists:foldl(Fun, #{}, schema_modules()). resource_type(Type) when is_binary(Type) -> resource_type(binary_to_atom(Type, utf8)); -resource_type(kafka) -> emqx_bridge_impl_kafka; +resource_type(kafka_consumer) -> emqx_bridge_impl_kafka_consumer; +%% TODO: rename this to `kafka_producer' after alias support is added +%% to hocon; keeping this as just `kafka' for backwards compatibility. +resource_type(kafka) -> emqx_bridge_impl_kafka_producer; resource_type(hstreamdb) -> emqx_ee_connector_hstreamdb; resource_type(gcp_pubsub) -> emqx_ee_connector_gcp_pubsub; resource_type(mongodb_rs) -> emqx_ee_connector_mongodb; @@ -85,14 +89,6 @@ resource_type(dynamo) -> emqx_ee_connector_dynamo. fields(bridges) -> [ - {kafka, - mk( - hoconsc:map(name, ref(emqx_ee_bridge_kafka, "config")), - #{ - desc => <<"Kafka Bridge Config">>, - required => false - } - )}, {hstreamdb, mk( hoconsc:map(name, ref(emqx_ee_bridge_hstreamdb, "config")), @@ -133,8 +129,8 @@ fields(bridges) -> required => false } )} - ] ++ mongodb_structs() ++ influxdb_structs() ++ redis_structs() ++ pgsql_structs() ++ - clickhouse_structs(). + ] ++ kafka_structs() ++ mongodb_structs() ++ influxdb_structs() ++ redis_structs() ++ + pgsql_structs() ++ clickhouse_structs(). mongodb_structs() -> [ @@ -149,6 +145,27 @@ mongodb_structs() -> || Type <- [mongodb_rs, mongodb_sharded, mongodb_single] ]. +kafka_structs() -> + [ + %% TODO: rename this to `kafka_producer' after alias support + %% is added to hocon; keeping this as just `kafka' for + %% backwards compatibility. + {kafka, + mk( + hoconsc:map(name, ref(emqx_ee_bridge_kafka, kafka_producer)), + #{ + desc => <<"Kafka Producer Bridge Config">>, + required => false, + converter => fun emqx_ee_bridge_kafka:kafka_producer_converter/2 + } + )}, + {kafka_consumer, + mk( + hoconsc:map(name, ref(emqx_ee_bridge_kafka, kafka_consumer)), + #{desc => <<"Kafka Consumer Bridge Config">>, required => false} + )} + ]. + influxdb_structs() -> [ {Protocol, diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_kafka.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_kafka.erl index c345f6c74..8e9ff9628 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_kafka.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_kafka.erl @@ -30,27 +30,102 @@ host_opts/0 ]). +-export([kafka_producer_converter/2]). + %% ------------------------------------------------------------------------------------------------- %% api conn_bridge_examples(Method) -> [ #{ + %% TODO: rename this to `kafka_producer' after alias + %% support is added to hocon; keeping this as just `kafka' + %% for backwards compatibility. <<"kafka">> => #{ - summary => <<"Kafka Bridge">>, - value => values(Method) + summary => <<"Kafka Producer Bridge">>, + value => values({Method, producer}) + } + }, + #{ + <<"kafka_consumer">> => #{ + summary => <<"Kafka Consumer Bridge">>, + value => values({Method, consumer}) } } ]. -values(get) -> - maps:merge(values(post), ?METRICS_EXAMPLE); -values(post) -> +values({get, KafkaType}) -> + maps:merge(values({post, KafkaType}), ?METRICS_EXAMPLE); +values({post, KafkaType}) -> + maps:merge(values(common_config), values(KafkaType)); +values({put, KafkaType}) -> + values({post, KafkaType}); +values(common_config) -> #{ - bootstrap_hosts => <<"localhost:9092">> + authentication => #{ + mechanism => <<"plain">>, + username => <<"username">>, + password => <<"password">> + }, + bootstrap_hosts => <<"localhost:9092">>, + connect_timeout => <<"5s">>, + enable => true, + metadata_request_timeout => <<"4s">>, + min_metadata_refresh_interval => <<"3s">>, + socket_opts => #{ + sndbuf => <<"1024KB">>, + recbuf => <<"1024KB">>, + nodelay => true + } }; -values(put) -> - values(post). +values(producer) -> + #{ + kafka => #{ + topic => <<"kafka-topic">>, + message => #{ + key => <<"${.clientid}">>, + value => <<"${.}">>, + timestamp => <<"${.timestamp}">> + }, + max_batch_bytes => <<"896KB">>, + compression => <<"no_compression">>, + partition_strategy => <<"random">>, + required_acks => <<"all_isr">>, + partition_count_refresh_interval => <<"60s">>, + max_inflight => 10, + buffer => #{ + mode => <<"hybrid">>, + per_partition_limit => <<"2GB">>, + segment_bytes => <<"100MB">>, + memory_overload_protection => true + } + }, + local_topic => <<"mqtt/local/topic">> + }; +values(consumer) -> + #{ + kafka => #{ + max_batch_bytes => <<"896KB">>, + offset_reset_policy => <<"reset_to_latest">>, + offset_commit_interval_seconds => 5 + }, + key_encoding_mode => <<"none">>, + topic_mapping => [ + #{ + kafka_topic => <<"kafka-topic-1">>, + mqtt_topic => <<"mqtt/topic/1">>, + qos => 1, + payload_template => <<"${.}">> + }, + #{ + kafka_topic => <<"kafka-topic-2">>, + mqtt_topic => <<"mqtt/topic/2">>, + qos => 2, + payload_template => <<"v = ${.value}">> + } + ], + value_encoding_mode => <<"none">> + }. %% ------------------------------------------------------------------------------------------------- %% Hocon Schema Definitions @@ -60,14 +135,22 @@ host_opts() -> namespace() -> "bridge_kafka". -roots() -> ["config"]. +roots() -> ["config_consumer", "config_producer"]. -fields("post") -> - [type_field(), name_field() | fields("config")]; -fields("put") -> - fields("config"); -fields("get") -> - emqx_bridge_schema:status_fields() ++ fields("post"); +fields("post_" ++ Type) -> + [type_field(), name_field() | fields("config_" ++ Type)]; +fields("put_" ++ Type) -> + fields("config_" ++ Type); +fields("get_" ++ Type) -> + emqx_bridge_schema:status_fields() ++ fields("post_" ++ Type); +fields("config_producer") -> + fields(kafka_producer); +fields("config_consumer") -> + fields(kafka_consumer); +fields(kafka_producer) -> + fields("config") ++ fields(producer_opts); +fields(kafka_consumer) -> + fields("config") ++ fields(consumer_opts); fields("config") -> [ {enable, mk(boolean(), #{desc => ?DESC("config_enable"), default => true})}, @@ -104,8 +187,6 @@ fields("config") -> mk(hoconsc:union([none, ref(auth_username_password), ref(auth_gssapi_kerberos)]), #{ default => none, desc => ?DESC("authentication") })}, - {producer, mk(hoconsc:union([none, ref(producer_opts)]), #{desc => ?DESC(producer_opts)})}, - %{consumer, mk(hoconsc:union([none, ref(consumer_opts)]), #{desc => ?DESC(consumer_opts)})}, {socket_opts, mk(ref(socket_opts), #{required => false, desc => ?DESC(socket_opts)})} ] ++ emqx_connector_schema_lib:ssl_fields(); fields(auth_username_password) -> @@ -156,15 +237,16 @@ fields(socket_opts) -> ]; fields(producer_opts) -> [ - {mqtt, mk(ref(producer_mqtt_opts), #{desc => ?DESC(producer_mqtt_opts)})}, + %% Note: there's an implicit convention in `emqx_bridge' that, + %% for egress bridges with this config, the published messages + %% will be forwarded to such bridges. + {local_topic, mk(binary(), #{required => false, desc => ?DESC(mqtt_topic)})}, {kafka, mk(ref(producer_kafka_opts), #{ required => true, desc => ?DESC(producer_kafka_opts) })} ]; -fields(producer_mqtt_opts) -> - [{topic, mk(binary(), #{desc => ?DESC(mqtt_topic)})}]; fields(producer_kafka_opts) -> [ {topic, mk(string(), #{required => true, desc => ?DESC(kafka_topic)})}, @@ -241,28 +323,72 @@ fields(producer_buffer) -> default => false, desc => ?DESC(buffer_memory_overload_protection) })} + ]; +fields(consumer_opts) -> + [ + {kafka, + mk(ref(consumer_kafka_opts), #{required => false, desc => ?DESC(consumer_kafka_opts)})}, + {topic_mapping, + mk( + hoconsc:array(ref(consumer_topic_mapping)), + #{ + required => true, + desc => ?DESC(consumer_topic_mapping), + validator => fun consumer_topic_mapping_validator/1 + } + )}, + {key_encoding_mode, + mk(enum([none, base64]), #{ + default => none, desc => ?DESC(consumer_encoding_mode) + })}, + {value_encoding_mode, + mk(enum([none, base64]), #{ + default => none, desc => ?DESC(consumer_encoding_mode) + })} + ]; +fields(consumer_topic_mapping) -> + [ + {kafka_topic, mk(binary(), #{required => true, desc => ?DESC(consumer_kafka_topic)})}, + {mqtt_topic, mk(binary(), #{required => true, desc => ?DESC(consumer_mqtt_topic)})}, + {qos, mk(emqx_schema:qos(), #{default => 0, desc => ?DESC(consumer_mqtt_qos)})}, + {payload_template, + mk( + string(), + #{default => <<"${.}">>, desc => ?DESC(consumer_mqtt_payload)} + )} + ]; +fields(consumer_kafka_opts) -> + [ + {max_batch_bytes, + mk(emqx_schema:bytesize(), #{ + default => "896KB", desc => ?DESC(consumer_max_batch_bytes) + })}, + {max_rejoin_attempts, + mk(non_neg_integer(), #{ + hidden => true, + default => 5, + desc => ?DESC(consumer_max_rejoin_attempts) + })}, + {offset_reset_policy, + mk( + enum([reset_to_latest, reset_to_earliest, reset_by_subscriber]), + #{default => reset_to_latest, desc => ?DESC(consumer_offset_reset_policy)} + )}, + {offset_commit_interval_seconds, + mk( + pos_integer(), + #{default => 5, desc => ?DESC(consumer_offset_commit_interval_seconds)} + )} ]. -% fields(consumer_opts) -> -% [ -% {kafka, mk(ref(consumer_kafka_opts), #{required => true, desc => ?DESC(consumer_kafka_opts)})}, -% {mqtt, mk(ref(consumer_mqtt_opts), #{required => true, desc => ?DESC(consumer_mqtt_opts)})} -% ]; -% fields(consumer_mqtt_opts) -> -% [ {topic, mk(string(), #{desc => ?DESC(consumer_mqtt_topic)})} -% ]; - -% fields(consumer_mqtt_opts) -> -% [ {topic, mk(string(), #{desc => ?DESC(consumer_mqtt_topic)})} -% ]; -% fields(consumer_kafka_opts) -> -% [ {topic, mk(string(), #{desc => ?DESC(consumer_kafka_topic)})} -% ]. - desc("config") -> ?DESC("desc_config"); -desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> - ["Configuration for Kafka using `", string:to_upper(Method), "` method."]; +desc("get_" ++ Type) when Type =:= "consumer"; Type =:= "producer" -> + ["Configuration for Kafka using `GET` method."]; +desc("put_" ++ Type) when Type =:= "consumer"; Type =:= "producer" -> + ["Configuration for Kafka using `PUT` method."]; +desc("post_" ++ Type) when Type =:= "consumer"; Type =:= "producer" -> + ["Configuration for Kafka using `POST` method."]; desc(Name) -> lists:member(Name, struct_names()) orelse throw({missing_desc, Name}), ?DESC(Name). @@ -272,20 +398,61 @@ struct_names() -> auth_gssapi_kerberos, auth_username_password, kafka_message, + kafka_producer, + kafka_consumer, producer_buffer, producer_kafka_opts, - producer_mqtt_opts, socket_opts, - producer_opts + producer_opts, + consumer_opts, + consumer_kafka_opts, + consumer_topic_mapping ]. %% ------------------------------------------------------------------------------------------------- %% internal type_field() -> - {type, mk(enum([kafka]), #{required => true, desc => ?DESC("desc_type")})}. + {type, + %% TODO: rename `kafka' to `kafka_producer' after alias + %% support is added to hocon; keeping this as just `kafka' for + %% backwards compatibility. + mk(enum([kafka_consumer, kafka]), #{required => true, desc => ?DESC("desc_type")})}. name_field() -> {name, mk(binary(), #{required => true, desc => ?DESC("desc_name")})}. ref(Name) -> hoconsc:ref(?MODULE, Name). + +kafka_producer_converter(undefined, _HoconOpts) -> + undefined; +kafka_producer_converter( + #{<<"producer">> := OldOpts0, <<"bootstrap_hosts">> := _} = Config0, _HoconOpts +) -> + %% old schema + MQTTOpts = maps:get(<<"mqtt">>, OldOpts0, #{}), + LocalTopic = maps:get(<<"topic">>, MQTTOpts, undefined), + KafkaOpts = maps:get(<<"kafka">>, OldOpts0), + Config = maps:without([<<"producer">>], Config0), + case LocalTopic =:= undefined of + true -> + Config#{<<"kafka">> => KafkaOpts}; + false -> + Config#{<<"kafka">> => KafkaOpts, <<"local_topic">> => LocalTopic} + end; +kafka_producer_converter(Config, _HoconOpts) -> + %% new schema + Config. + +consumer_topic_mapping_validator(_TopicMapping = []) -> + {error, "There must be at least one Kafka-MQTT topic mapping"}; +consumer_topic_mapping_validator(TopicMapping = [_ | _]) -> + NumEntries = length(TopicMapping), + KafkaTopics = [KT || #{<<"kafka_topic">> := KT} <- TopicMapping], + DistinctKafkaTopics = length(lists:usort(KafkaTopics)), + case DistinctKafkaTopics =:= NumEntries of + true -> + ok; + false -> + {error, "Kafka topics must not be repeated in a bridge"} + end. diff --git a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka.erl b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka.erl index 49ca9fb86..c9dcce9a2 100644 --- a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka.erl +++ b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka.erl @@ -4,34 +4,38 @@ %% Kafka connection configuration -module(emqx_bridge_impl_kafka). --behaviour(emqx_resource). -%% callbacks of behaviour emqx_resource -export([ - callback_mode/0, - on_start/2, - on_stop/2, - on_query/3, - on_query_async/4, - on_get_status/2, - is_buffer_supported/0 + hosts/1, + make_client_id/2, + sasl/1 ]). -is_buffer_supported() -> true. +%% Parse comma separated host:port list into a [{Host,Port}] list +hosts(Hosts) when is_binary(Hosts) -> + hosts(binary_to_list(Hosts)); +hosts(Hosts) when is_list(Hosts) -> + kpro:parse_endpoints(Hosts). -callback_mode() -> async_if_possible. +%% Client ID is better to be unique to make it easier for Kafka side trouble shooting. +make_client_id(KafkaType0, BridgeName0) -> + KafkaType = to_bin(KafkaType0), + BridgeName = to_bin(BridgeName0), + iolist_to_binary([KafkaType, ":", BridgeName, ":", atom_to_list(node())]). -on_start(InstId, Config) -> - emqx_bridge_impl_kafka_producer:on_start(InstId, Config). +sasl(none) -> + undefined; +sasl(#{mechanism := Mechanism, username := Username, password := Password}) -> + {Mechanism, Username, emqx_secret:wrap(Password)}; +sasl(#{ + kerberos_principal := Principal, + kerberos_keytab_file := KeyTabFile +}) -> + {callback, brod_gssapi, {gssapi, KeyTabFile, Principal}}. -on_stop(InstId, State) -> - emqx_bridge_impl_kafka_producer:on_stop(InstId, State). - -on_query(InstId, Req, State) -> - emqx_bridge_impl_kafka_producer:on_query(InstId, Req, State). - -on_query_async(InstId, Req, ReplyFn, State) -> - emqx_bridge_impl_kafka_producer:on_query_async(InstId, Req, ReplyFn, State). - -on_get_status(InstId, State) -> - emqx_bridge_impl_kafka_producer:on_get_status(InstId, State). +to_bin(A) when is_atom(A) -> + atom_to_binary(A); +to_bin(L) when is_list(L) -> + list_to_binary(L); +to_bin(B) when is_binary(B) -> + B. diff --git a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_consumer.erl b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_consumer.erl new file mode 100644 index 000000000..44633213c --- /dev/null +++ b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_consumer.erl @@ -0,0 +1,499 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- +-module(emqx_bridge_impl_kafka_consumer). + +-behaviour(emqx_resource). + +%% `emqx_resource' API +-export([ + callback_mode/0, + is_buffer_supported/0, + on_start/2, + on_stop/2, + on_get_status/2 +]). + +%% `brod_group_consumer' API +-export([ + init/2, + handle_message/2 +]). + +-ifdef(TEST). +-export([consumer_group_id/1]). +-endif. + +-include_lib("emqx/include/logger.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). +%% needed for the #kafka_message record definition +-include_lib("brod/include/brod.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). + +-type config() :: #{ + authentication := term(), + bootstrap_hosts := binary(), + bridge_name := atom(), + kafka := #{ + max_batch_bytes := emqx_schema:bytesize(), + max_rejoin_attempts := non_neg_integer(), + offset_commit_interval_seconds := pos_integer(), + offset_reset_policy := offset_reset_policy(), + topic := binary() + }, + topic_mapping := nonempty_list( + #{ + kafka_topic := kafka_topic(), + mqtt_topic := emqx_types:topic(), + qos := emqx_types:qos(), + payload_template := string() + } + ), + ssl := _, + any() => term() +}. +-type subscriber_id() :: emqx_ee_bridge_kafka_consumer_sup:child_id(). +-type kafka_topic() :: brod:topic(). +-type state() :: #{ + kafka_topics := nonempty_list(kafka_topic()), + subscriber_id := subscriber_id(), + kafka_client_id := brod:client_id() +}. +-type offset_reset_policy() :: reset_to_latest | reset_to_earliest | reset_by_subscriber. +%% -type mqtt_payload() :: full_message | message_value. +-type encoding_mode() :: none | base64. +-type consumer_init_data() :: #{ + hookpoint := binary(), + key_encoding_mode := encoding_mode(), + resource_id := resource_id(), + topic_mapping := #{ + kafka_topic() := #{ + payload_template := emqx_plugin_libs_rule:tmpl_token(), + mqtt_topic => emqx_types:topic(), + qos => emqx_types:qos() + } + }, + value_encoding_mode := encoding_mode() +}. +-type consumer_state() :: #{ + hookpoint := binary(), + kafka_topic := binary(), + key_encoding_mode := encoding_mode(), + resource_id := resource_id(), + topic_mapping := #{ + kafka_topic() := #{ + payload_template := emqx_plugin_libs_rule:tmpl_token(), + mqtt_topic => emqx_types:topic(), + qos => emqx_types:qos() + } + }, + value_encoding_mode := encoding_mode() +}. +-type subscriber_init_info() :: #{ + topic => brod:topic(), + parition => brod:partition(), + group_id => brod:group_id(), + commit_fun => brod_group_subscriber_v2:commit_fun() +}. + +%%------------------------------------------------------------------------------------- +%% `emqx_resource' API +%%------------------------------------------------------------------------------------- + +callback_mode() -> + async_if_possible. + +%% there are no queries to be made to this bridge, so we say that +%% buffer is supported so we don't spawn unused resource buffer +%% workers. +is_buffer_supported() -> + true. + +-spec on_start(manager_id(), config()) -> {ok, state()}. +on_start(InstanceId, Config) -> + #{ + authentication := Auth, + bootstrap_hosts := BootstrapHosts0, + bridge_name := BridgeName, + hookpoint := _, + kafka := #{ + max_batch_bytes := _, + max_rejoin_attempts := _, + offset_commit_interval_seconds := _, + offset_reset_policy := _ + }, + ssl := SSL, + topic_mapping := _ + } = Config, + BootstrapHosts = emqx_bridge_impl_kafka:hosts(BootstrapHosts0), + KafkaType = kafka_consumer, + %% Note: this is distinct per node. + ClientID = make_client_id(InstanceId, KafkaType, BridgeName), + ClientOpts0 = + case Auth of + none -> []; + Auth -> [{sasl, emqx_bridge_impl_kafka:sasl(Auth)}] + end, + ClientOpts = add_ssl_opts(ClientOpts0, SSL), + case brod:start_client(BootstrapHosts, ClientID, ClientOpts) of + ok -> + ?tp( + kafka_consumer_client_started, + #{client_id => ClientID, instance_id => InstanceId} + ), + ?SLOG(info, #{ + msg => "kafka_consumer_client_started", + instance_id => InstanceId, + kafka_hosts => BootstrapHosts + }); + {error, Reason} -> + ?SLOG(error, #{ + msg => "failed_to_start_kafka_consumer_client", + instance_id => InstanceId, + kafka_hosts => BootstrapHosts, + reason => emqx_misc:redact(Reason) + }), + throw(failed_to_start_kafka_client) + end, + start_consumer(Config, InstanceId, ClientID). + +-spec on_stop(manager_id(), state()) -> ok. +on_stop(_InstanceID, State) -> + #{ + subscriber_id := SubscriberId, + kafka_client_id := ClientID + } = State, + stop_subscriber(SubscriberId), + stop_client(ClientID), + ok. + +-spec on_get_status(manager_id(), state()) -> connected | disconnected. +on_get_status(_InstanceID, State) -> + #{ + subscriber_id := SubscriberId, + kafka_client_id := ClientID, + kafka_topics := KafkaTopics + } = State, + do_get_status(ClientID, KafkaTopics, SubscriberId). + +%%------------------------------------------------------------------------------------- +%% `brod_group_subscriber' API +%%------------------------------------------------------------------------------------- + +-spec init(subscriber_init_info(), consumer_init_data()) -> {ok, consumer_state()}. +init(GroupData, State0) -> + ?tp(kafka_consumer_subscriber_init, #{group_data => GroupData, state => State0}), + #{topic := KafkaTopic} = GroupData, + State = State0#{kafka_topic => KafkaTopic}, + {ok, State}. + +-spec handle_message(#kafka_message{}, consumer_state()) -> {ok, commit, consumer_state()}. +handle_message(Message, State) -> + ?tp_span( + kafka_consumer_handle_message, + #{message => Message, state => State}, + do_handle_message(Message, State) + ). + +do_handle_message(Message, State) -> + #{ + hookpoint := Hookpoint, + kafka_topic := KafkaTopic, + key_encoding_mode := KeyEncodingMode, + resource_id := ResourceId, + topic_mapping := TopicMapping, + value_encoding_mode := ValueEncodingMode + } = State, + #{ + mqtt_topic := MQTTTopic, + qos := MQTTQoS, + payload_template := PayloadTemplate + } = maps:get(KafkaTopic, TopicMapping), + FullMessage = #{ + headers => maps:from_list(Message#kafka_message.headers), + key => encode(Message#kafka_message.key, KeyEncodingMode), + offset => Message#kafka_message.offset, + topic => KafkaTopic, + ts => Message#kafka_message.ts, + ts_type => Message#kafka_message.ts_type, + value => encode(Message#kafka_message.value, ValueEncodingMode) + }, + Payload = render(FullMessage, PayloadTemplate), + MQTTMessage = emqx_message:make(ResourceId, MQTTQoS, MQTTTopic, Payload), + _ = emqx:publish(MQTTMessage), + emqx:run_hook(Hookpoint, [FullMessage]), + emqx_resource_metrics:received_inc(ResourceId), + %% note: just `ack' does not commit the offset to the + %% kafka consumer group. + {ok, commit, State}. + +%%------------------------------------------------------------------------------------- +%% Helper fns +%%------------------------------------------------------------------------------------- + +add_ssl_opts(ClientOpts, #{enable := false}) -> + ClientOpts; +add_ssl_opts(ClientOpts, SSL) -> + [{ssl, emqx_tls_lib:to_client_opts(SSL)} | ClientOpts]. + +-spec make_subscriber_id(atom() | binary()) -> emqx_ee_bridge_kafka_consumer_sup:child_id(). +make_subscriber_id(BridgeName) -> + BridgeNameBin = to_bin(BridgeName), + <<"kafka_subscriber:", BridgeNameBin/binary>>. + +ensure_consumer_supervisor_started() -> + Mod = emqx_ee_bridge_kafka_consumer_sup, + ChildSpec = + #{ + id => Mod, + start => {Mod, start_link, []}, + restart => permanent, + shutdown => infinity, + type => supervisor, + modules => [Mod] + }, + case supervisor:start_child(emqx_bridge_sup, ChildSpec) of + {ok, _Pid} -> + ok; + {error, already_present} -> + ok; + {error, {already_started, _Pid}} -> + ok + end. + +-spec start_consumer(config(), manager_id(), brod:client_id()) -> {ok, state()}. +start_consumer(Config, InstanceId, ClientID) -> + #{ + bootstrap_hosts := BootstrapHosts0, + bridge_name := BridgeName, + hookpoint := Hookpoint, + kafka := #{ + max_batch_bytes := MaxBatchBytes, + max_rejoin_attempts := MaxRejoinAttempts, + offset_commit_interval_seconds := OffsetCommitInterval, + offset_reset_policy := OffsetResetPolicy + }, + key_encoding_mode := KeyEncodingMode, + topic_mapping := TopicMapping0, + value_encoding_mode := ValueEncodingMode + } = Config, + ok = ensure_consumer_supervisor_started(), + TopicMapping = convert_topic_mapping(TopicMapping0), + InitialState = #{ + key_encoding_mode => KeyEncodingMode, + hookpoint => Hookpoint, + resource_id => emqx_bridge_resource:resource_id(kafka_consumer, BridgeName), + topic_mapping => TopicMapping, + value_encoding_mode => ValueEncodingMode + }, + %% note: the group id should be the same for all nodes in the + %% cluster, so that the load gets distributed between all + %% consumers and we don't repeat messages in the same cluster. + GroupID = consumer_group_id(BridgeName), + ConsumerConfig = [ + {max_bytes, MaxBatchBytes}, + {offset_reset_policy, OffsetResetPolicy} + ], + GroupConfig = [ + {max_rejoin_attempts, MaxRejoinAttempts}, + {offset_commit_interval_seconds, OffsetCommitInterval} + ], + KafkaTopics = maps:keys(TopicMapping), + GroupSubscriberConfig = + #{ + client => ClientID, + group_id => GroupID, + topics => KafkaTopics, + cb_module => ?MODULE, + init_data => InitialState, + message_type => message, + consumer_config => ConsumerConfig, + group_config => GroupConfig + }, + %% Below, we spawn a single `brod_group_consumer_v2' worker, with + %% no option for a pool of those. This is because that worker + %% spawns one worker for each assigned topic-partition + %% automatically, so we should not spawn duplicate workers. + SubscriberId = make_subscriber_id(BridgeName), + case emqx_ee_bridge_kafka_consumer_sup:start_child(SubscriberId, GroupSubscriberConfig) of + {ok, _ConsumerPid} -> + ?tp( + kafka_consumer_subscriber_started, + #{instance_id => InstanceId, subscriber_id => SubscriberId} + ), + {ok, #{ + subscriber_id => SubscriberId, + kafka_client_id => ClientID, + kafka_topics => KafkaTopics + }}; + {error, Reason2} -> + ?SLOG(error, #{ + msg => "failed_to_start_kafka_consumer", + instance_id => InstanceId, + kafka_hosts => emqx_bridge_impl_kafka:hosts(BootstrapHosts0), + reason => emqx_misc:redact(Reason2) + }), + stop_client(ClientID), + throw(failed_to_start_kafka_consumer) + end. + +-spec stop_subscriber(emqx_ee_bridge_kafka_consumer_sup:child_id()) -> ok. +stop_subscriber(SubscriberId) -> + _ = log_when_error( + fun() -> + emqx_ee_bridge_kafka_consumer_sup:ensure_child_deleted(SubscriberId) + end, + #{ + msg => "failed_to_delete_kafka_subscriber", + subscriber_id => SubscriberId + } + ), + ok. + +-spec stop_client(brod:client_id()) -> ok. +stop_client(ClientID) -> + _ = log_when_error( + fun() -> + brod:stop_client(ClientID) + end, + #{ + msg => "failed_to_delete_kafka_consumer_client", + client_id => ClientID + } + ), + ok. + +do_get_status(ClientID, [KafkaTopic | RestTopics], SubscriberId) -> + case brod:get_partitions_count(ClientID, KafkaTopic) of + {ok, NPartitions} -> + case do_get_status(ClientID, KafkaTopic, SubscriberId, NPartitions) of + connected -> do_get_status(ClientID, RestTopics, SubscriberId); + disconnected -> disconnected + end; + _ -> + disconnected + end; +do_get_status(_ClientID, _KafkaTopics = [], _SubscriberId) -> + connected. + +-spec do_get_status(brod:client_id(), binary(), subscriber_id(), pos_integer()) -> + connected | disconnected. +do_get_status(ClientID, KafkaTopic, SubscriberId, NPartitions) -> + Results = + lists:map( + fun(N) -> + brod_client:get_leader_connection(ClientID, KafkaTopic, N) + end, + lists:seq(0, NPartitions - 1) + ), + AllLeadersOk = + length(Results) > 0 andalso + lists:all( + fun + ({ok, _}) -> + true; + (_) -> + false + end, + Results + ), + WorkersAlive = are_subscriber_workers_alive(SubscriberId), + case AllLeadersOk andalso WorkersAlive of + true -> + connected; + false -> + disconnected + end. + +are_subscriber_workers_alive(SubscriberId) -> + Children = supervisor:which_children(emqx_ee_bridge_kafka_consumer_sup), + case lists:keyfind(SubscriberId, 1, Children) of + false -> + false; + {_, Pid, _, _} -> + Workers = brod_group_subscriber_v2:get_workers(Pid), + %% we can't enforce the number of partitions on a single + %% node, as the group might be spread across an emqx + %% cluster. + lists:all(fun is_process_alive/1, maps:values(Workers)) + end. + +log_when_error(Fun, Log) -> + try + Fun() + catch + C:E -> + ?SLOG(error, Log#{ + exception => C, + reason => E + }) + end. + +-spec consumer_group_id(atom() | binary()) -> binary(). +consumer_group_id(BridgeName0) -> + BridgeName = to_bin(BridgeName0), + <<"emqx-kafka-consumer-", BridgeName/binary>>. + +-spec is_dry_run(manager_id()) -> boolean(). +is_dry_run(InstanceId) -> + TestIdStart = string:find(InstanceId, ?TEST_ID_PREFIX), + case TestIdStart of + nomatch -> + false; + _ -> + string:equal(TestIdStart, InstanceId) + end. + +-spec make_client_id(manager_id(), kafka_consumer, atom() | binary()) -> atom(). +make_client_id(InstanceId, KafkaType, KafkaName) -> + case is_dry_run(InstanceId) of + false -> + ClientID0 = emqx_bridge_impl_kafka:make_client_id(KafkaType, KafkaName), + binary_to_atom(ClientID0); + true -> + %% It is a dry run and we don't want to leak too many + %% atoms. + probing_brod_consumers + end. + +convert_topic_mapping(TopicMappingList) -> + lists:foldl( + fun(Fields, Acc) -> + #{ + kafka_topic := KafkaTopic, + mqtt_topic := MQTTTopic, + qos := QoS, + payload_template := PayloadTemplate0 + } = Fields, + PayloadTemplate = emqx_plugin_libs_rule:preproc_tmpl(PayloadTemplate0), + Acc#{ + KafkaTopic => #{ + payload_template => PayloadTemplate, + mqtt_topic => MQTTTopic, + qos => QoS + } + } + end, + #{}, + TopicMappingList + ). + +render(FullMessage, PayloadTemplate) -> + Opts = #{ + return => full_binary, + var_trans => fun + (undefined) -> + <<>>; + (X) -> + emqx_plugin_libs_rule:bin(X) + end + }, + emqx_plugin_libs_rule:proc_tmpl(PayloadTemplate, FullMessage, Opts). + +encode(Value, none) -> + Value; +encode(Value, base64) -> + base64:encode(Value). + +to_bin(B) when is_binary(B) -> B; +to_bin(A) when is_atom(A) -> atom_to_binary(A, utf8). diff --git a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl index cff17b7de..d46f687dd 100644 --- a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl +++ b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl @@ -22,44 +22,39 @@ -include_lib("emqx/include/logger.hrl"). +%% TODO: rename this to `kafka_producer' after alias support is added +%% to hocon; keeping this as just `kafka' for backwards compatibility. +-define(BRIDGE_TYPE, kafka). + callback_mode() -> async_if_possible. %% @doc Config schema is defined in emqx_ee_bridge_kafka. on_start(InstId, Config) -> #{ - bridge_name := BridgeName, + authentication := Auth, bootstrap_hosts := Hosts0, + bridge_name := BridgeName, connect_timeout := ConnTimeout, + kafka := KafkaConfig = #{message := MessageTemplate, topic := KafkaTopic}, metadata_request_timeout := MetaReqTimeout, min_metadata_refresh_interval := MinMetaRefreshInterval, socket_opts := SocketOpts, - authentication := Auth, ssl := SSL } = Config, - %% TODO: change this to `kafka_producer` after refactoring for kafka_consumer - BridgeType = kafka, - ResourceID = emqx_bridge_resource:resource_id(BridgeType, BridgeName), - _ = maybe_install_wolff_telemetry_handlers(ResourceID), - %% it's a bug if producer config is not found - %% the caller should not try to start a producer if - %% there is no producer config - ProducerConfigWrapper = get_required(producer, Config, no_kafka_producer_config), - ProducerConfig = get_required(kafka, ProducerConfigWrapper, no_kafka_producer_parameters), - MessageTemplate = get_required(message, ProducerConfig, no_kafka_message_template), - Hosts = hosts(Hosts0), - ClientId = make_client_id(BridgeName), + BridgeType = ?BRIDGE_TYPE, + ResourceId = emqx_bridge_resource:resource_id(BridgeType, BridgeName), + _ = maybe_install_wolff_telemetry_handlers(ResourceId), + Hosts = emqx_bridge_impl_kafka:hosts(Hosts0), + ClientId = emqx_bridge_impl_kafka:make_client_id(BridgeType, BridgeName), ClientConfig = #{ min_metadata_refresh_interval => MinMetaRefreshInterval, connect_timeout => ConnTimeout, client_id => ClientId, request_timeout => MetaReqTimeout, extra_sock_opts => socket_opts(SocketOpts), - sasl => sasl(Auth), + sasl => emqx_bridge_impl_kafka:sasl(Auth), ssl => ssl(SSL) }, - #{ - topic := KafkaTopic - } = ProducerConfig, case wolff:ensure_supervised_client(ClientId, Hosts, ClientConfig) of {ok, _} -> ?SLOG(info, #{ @@ -85,7 +80,7 @@ on_start(InstId, Config) -> _ -> string:equal(TestIdStart, InstId) end, - WolffProducerConfig = producers_config(BridgeName, ClientId, ProducerConfig, IsDryRun), + WolffProducerConfig = producers_config(BridgeName, ClientId, KafkaConfig, IsDryRun), case wolff:ensure_supervised_producers(ClientId, KafkaTopic, WolffProducerConfig) of {ok, Producers} -> {ok, #{ @@ -93,7 +88,7 @@ on_start(InstId, Config) -> client_id => ClientId, kafka_topic => KafkaTopic, producers => Producers, - resource_id => ResourceID + resource_id => ResourceId }}; {error, Reason2} -> ?SLOG(error, #{ @@ -265,12 +260,6 @@ do_get_status(Client, KafkaTopic) -> disconnected end. -%% Parse comma separated host:port list into a [{Host,Port}] list -hosts(Hosts) when is_binary(Hosts) -> - hosts(binary_to_list(Hosts)); -hosts(Hosts) when is_list(Hosts) -> - kpro:parse_endpoints(Hosts). - %% Extra socket options, such as sndbuf size etc. socket_opts(Opts) when is_map(Opts) -> socket_opts(maps:to_list(Opts)); @@ -298,16 +287,6 @@ adjust_socket_buffer(Bytes, Opts) -> [{buffer, max(Bytes1, Bytes)} | Acc1] end. -sasl(none) -> - undefined; -sasl(#{mechanism := Mechanism, username := Username, password := Password}) -> - {Mechanism, Username, emqx_secret:wrap(Password)}; -sasl(#{ - kerberos_principal := Principal, - kerberos_keytab_file := KeyTabFile -}) -> - {callback, brod_gssapi, {gssapi, KeyTabFile, Principal}}. - ssl(#{enable := true} = SSL) -> emqx_tls_lib:to_client_opts(SSL); ssl(_) -> @@ -339,8 +318,7 @@ producers_config(BridgeName, ClientId, Input, IsDryRun) -> disk -> {false, replayq_dir(ClientId)}; hybrid -> {true, replayq_dir(ClientId)} end, - %% TODO: change this once we add kafka source - BridgeType = kafka, + BridgeType = ?BRIDGE_TYPE, ResourceID = emqx_bridge_resource:resource_id(BridgeType, BridgeName), #{ name => make_producer_name(BridgeName, IsDryRun), @@ -366,12 +344,6 @@ partitioner(key_dispatch) -> first_key_dispatch. replayq_dir(ClientId) -> filename:join([emqx:data_dir(), "kafka", ClientId]). -%% Client ID is better to be unique to make it easier for Kafka side trouble shooting. -make_client_id(BridgeName) when is_atom(BridgeName) -> - make_client_id(atom_to_list(BridgeName)); -make_client_id(BridgeName) -> - iolist_to_binary([BridgeName, ":", atom_to_list(node())]). - %% Producer name must be an atom which will be used as a ETS table name for %% partition worker lookup. make_producer_name(BridgeName, IsDryRun) when is_atom(BridgeName) -> @@ -400,11 +372,6 @@ with_log_at_error(Fun, Log) -> }) end. -get_required(Field, Config, Throw) -> - Value = maps:get(Field, Config, none), - Value =:= none andalso throw(Throw), - Value. - %% we *must* match the bridge id in the event metadata with that in %% the handler config; otherwise, multiple kafka producer bridges will %% install multiple handlers to the same wolff events, multiplying the diff --git a/lib-ee/emqx_ee_bridge/src/kafka/emqx_ee_bridge_kafka_consumer_sup.erl b/lib-ee/emqx_ee_bridge/src/kafka/emqx_ee_bridge_kafka_consumer_sup.erl new file mode 100644 index 000000000..feec8c09b --- /dev/null +++ b/lib-ee/emqx_ee_bridge/src/kafka/emqx_ee_bridge_kafka_consumer_sup.erl @@ -0,0 +1,79 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_ee_bridge_kafka_consumer_sup). + +-behaviour(supervisor). + +%% `supervisor' API +-export([init/1]). + +%% API +-export([ + start_link/0, + child_spec/2, + start_child/2, + ensure_child_deleted/1 +]). + +-type child_id() :: binary(). +-export_type([child_id/0]). + +%%-------------------------------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------------------------------- + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +-spec child_spec(child_id(), map()) -> supervisor:child_spec(). +child_spec(Id, GroupSubscriberConfig) -> + Mod = brod_group_subscriber_v2, + #{ + id => Id, + start => {Mod, start_link, [GroupSubscriberConfig]}, + restart => permanent, + shutdown => 10_000, + type => worker, + modules => [Mod] + }. + +-spec start_child(child_id(), map()) -> {ok, pid()} | {error, term()}. +start_child(Id, GroupSubscriberConfig) -> + ChildSpec = child_spec(Id, GroupSubscriberConfig), + case supervisor:start_child(?MODULE, ChildSpec) of + {ok, Pid} -> + {ok, Pid}; + {ok, Pid, _Info} -> + {ok, Pid}; + {error, already_present} -> + supervisor:restart_child(?MODULE, Id); + {error, {already_started, Pid}} -> + {ok, Pid}; + {error, Error} -> + {error, Error} + end. + +-spec ensure_child_deleted(child_id()) -> ok. +ensure_child_deleted(Id) -> + case supervisor:terminate_child(?MODULE, Id) of + ok -> + ok = supervisor:delete_child(?MODULE, Id), + ok; + {error, not_found} -> + ok + end. + +%%-------------------------------------------------------------------------------------------- +%% `supervisor' API +%%-------------------------------------------------------------------------------------------- + +init([]) -> + SupFlags = #{ + strategy => one_for_one, + intensity => 100, + period => 10 + }, + ChildSpecs = [], + {ok, {SupFlags, ChildSpecs}}. diff --git a/lib-ee/emqx_ee_bridge/test/emqx_bridge_impl_kafka_consumer_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_bridge_impl_kafka_consumer_SUITE.erl new file mode 100644 index 000000000..15b4fbe40 --- /dev/null +++ b/lib-ee/emqx_ee_bridge/test/emqx_bridge_impl_kafka_consumer_SUITE.erl @@ -0,0 +1,1917 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- +-module(emqx_bridge_impl_kafka_consumer_SUITE). + +-compile(nowarn_export_all). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("brod/include/brod.hrl"). +-include_lib("emqx/include/emqx_mqtt.hrl"). + +-import(emqx_common_test_helpers, [on_exit/1]). + +-define(BRIDGE_TYPE_BIN, <<"kafka_consumer">>). + +%%------------------------------------------------------------------------------ +%% CT boilerplate +%%------------------------------------------------------------------------------ + +all() -> + [ + {group, plain}, + {group, ssl}, + {group, sasl_plain}, + {group, sasl_ssl} + ]. + +groups() -> + AllTCs = emqx_common_test_helpers:all(?MODULE), + SASLAuths = [ + sasl_auth_plain, + sasl_auth_scram256, + sasl_auth_scram512, + sasl_auth_kerberos + ], + SASLAuthGroups = [{group, Type} || Type <- SASLAuths], + OnlyOnceTCs = only_once_tests(), + MatrixTCs = AllTCs -- OnlyOnceTCs, + SASLTests = [{Group, MatrixTCs} || Group <- SASLAuths], + [ + {plain, MatrixTCs ++ OnlyOnceTCs}, + {ssl, MatrixTCs}, + {sasl_plain, SASLAuthGroups}, + {sasl_ssl, SASLAuthGroups} + ] ++ SASLTests. + +sasl_only_tests() -> + [t_failed_creation_then_fixed]. + +%% tests that do not need to be run on all groups +only_once_tests() -> + [ + t_bridge_rule_action_source, + t_cluster_group, + t_node_joins_existing_cluster, + t_cluster_node_down, + t_multiple_topic_mappings + ]. + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + emqx_mgmt_api_test_util:end_suite(), + ok = emqx_common_test_helpers:stop_apps([emqx_conf]), + ok = emqx_connector_test_helpers:stop_apps([emqx_bridge, emqx_resource, emqx_rule_engine]), + _ = application:stop(emqx_connector), + ok. + +init_per_group(plain = Type, Config) -> + KafkaHost = os:getenv("KAFKA_PLAIN_HOST", "toxiproxy.emqx.net"), + KafkaPort = list_to_integer(os:getenv("KAFKA_PLAIN_PORT", "9292")), + DirectKafkaHost = os:getenv("KAFKA_DIRECT_PLAIN_HOST", "kafka-1.emqx.net"), + DirectKafkaPort = list_to_integer(os:getenv("KAFKA_DIRECT_PLAIN_PORT", "9092")), + ProxyName = "kafka_plain", + case emqx_common_test_helpers:is_tcp_server_available(KafkaHost, KafkaPort) of + true -> + Config1 = common_init_per_group(), + [ + {proxy_name, ProxyName}, + {kafka_host, KafkaHost}, + {kafka_port, KafkaPort}, + {direct_kafka_host, DirectKafkaHost}, + {direct_kafka_port, DirectKafkaPort}, + {kafka_type, Type}, + {use_sasl, false}, + {use_tls, false} + | Config1 ++ Config + ]; + false -> + case os:getenv("IS_CI") of + "yes" -> + throw(no_kafka); + _ -> + {skip, no_kafka} + end + end; +init_per_group(sasl_plain = Type, Config) -> + KafkaHost = os:getenv("KAFKA_SASL_PLAIN_HOST", "toxiproxy.emqx.net"), + KafkaPort = list_to_integer(os:getenv("KAFKA_SASL_PLAIN_PORT", "9293")), + DirectKafkaHost = os:getenv("KAFKA_DIRECT_SASL_HOST", "kafka-1.emqx.net"), + DirectKafkaPort = list_to_integer(os:getenv("KAFKA_DIRECT_SASL_PORT", "9093")), + ProxyName = "kafka_sasl_plain", + case emqx_common_test_helpers:is_tcp_server_available(KafkaHost, KafkaPort) of + true -> + Config1 = common_init_per_group(), + [ + {proxy_name, ProxyName}, + {kafka_host, KafkaHost}, + {kafka_port, KafkaPort}, + {direct_kafka_host, DirectKafkaHost}, + {direct_kafka_port, DirectKafkaPort}, + {kafka_type, Type}, + {use_sasl, true}, + {use_tls, false} + | Config1 ++ Config + ]; + false -> + case os:getenv("IS_CI") of + "yes" -> + throw(no_kafka); + _ -> + {skip, no_kafka} + end + end; +init_per_group(ssl = Type, Config) -> + KafkaHost = os:getenv("KAFKA_SSL_HOST", "toxiproxy.emqx.net"), + KafkaPort = list_to_integer(os:getenv("KAFKA_SSL_PORT", "9294")), + DirectKafkaHost = os:getenv("KAFKA_DIRECT_SSL_HOST", "kafka-1.emqx.net"), + DirectKafkaPort = list_to_integer(os:getenv("KAFKA_DIRECT_SSL_PORT", "9094")), + ProxyName = "kafka_ssl", + case emqx_common_test_helpers:is_tcp_server_available(KafkaHost, KafkaPort) of + true -> + Config1 = common_init_per_group(), + [ + {proxy_name, ProxyName}, + {kafka_host, KafkaHost}, + {kafka_port, KafkaPort}, + {direct_kafka_host, DirectKafkaHost}, + {direct_kafka_port, DirectKafkaPort}, + {kafka_type, Type}, + {use_sasl, false}, + {use_tls, true} + | Config1 ++ Config + ]; + false -> + case os:getenv("IS_CI") of + "yes" -> + throw(no_kafka); + _ -> + {skip, no_kafka} + end + end; +init_per_group(sasl_ssl = Type, Config) -> + KafkaHost = os:getenv("KAFKA_SASL_SSL_HOST", "toxiproxy.emqx.net"), + KafkaPort = list_to_integer(os:getenv("KAFKA_SASL_SSL_PORT", "9295")), + DirectKafkaHost = os:getenv("KAFKA_DIRECT_SASL_SSL_HOST", "kafka-1.emqx.net"), + DirectKafkaPort = list_to_integer(os:getenv("KAFKA_DIRECT_SASL_SSL_PORT", "9095")), + ProxyName = "kafka_sasl_ssl", + case emqx_common_test_helpers:is_tcp_server_available(KafkaHost, KafkaPort) of + true -> + Config1 = common_init_per_group(), + [ + {proxy_name, ProxyName}, + {kafka_host, KafkaHost}, + {kafka_port, KafkaPort}, + {direct_kafka_host, DirectKafkaHost}, + {direct_kafka_port, DirectKafkaPort}, + {kafka_type, Type}, + {use_sasl, true}, + {use_tls, true} + | Config1 ++ Config + ]; + false -> + case os:getenv("IS_CI") of + "yes" -> + throw(no_kafka); + _ -> + {skip, no_kafka} + end + end; +init_per_group(sasl_auth_plain, Config) -> + [{sasl_auth_mechanism, plain} | Config]; +init_per_group(sasl_auth_scram256, Config) -> + [{sasl_auth_mechanism, scram_sha_256} | Config]; +init_per_group(sasl_auth_scram512, Config) -> + [{sasl_auth_mechanism, scram_sha_512} | Config]; +init_per_group(sasl_auth_kerberos, Config0) -> + %% currently it's tricky to setup kerberos + toxiproxy, probably + %% due to hostname issues... + UseTLS = ?config(use_tls, Config0), + {KafkaHost, KafkaPort} = + case UseTLS of + true -> + { + os:getenv("KAFKA_SASL_SSL_HOST", "kafka-1.emqx.net"), + list_to_integer(os:getenv("KAFKA_SASL_SSL_PORT", "9095")) + }; + false -> + { + os:getenv("KAFKA_SASL_PLAIN_HOST", "kafka-1.emqx.net"), + list_to_integer(os:getenv("KAFKA_SASL_PLAIN_PORT", "9093")) + } + end, + Config = + lists:map( + fun + ({kafka_host, _KafkaHost}) -> + {kafka_host, KafkaHost}; + ({kafka_port, _KafkaPort}) -> + {kafka_port, KafkaPort}; + (KV) -> + KV + end, + [{has_proxy, false}, {sasl_auth_mechanism, kerberos} | Config0] + ), + Config; +init_per_group(_Group, Config) -> + Config. + +common_init_per_group() -> + ProxyHost = os:getenv("PROXY_HOST", "toxiproxy"), + ProxyPort = list_to_integer(os:getenv("PROXY_PORT", "8474")), + emqx_common_test_helpers:reset_proxy(ProxyHost, ProxyPort), + application:load(emqx_bridge), + ok = emqx_common_test_helpers:start_apps([emqx_conf]), + ok = emqx_connector_test_helpers:start_apps([emqx_resource, emqx_bridge, emqx_rule_engine]), + {ok, _} = application:ensure_all_started(emqx_connector), + emqx_mgmt_api_test_util:init_suite(), + UniqueNum = integer_to_binary(erlang:unique_integer()), + MQTTTopic = <<"mqtt/topic/", UniqueNum/binary>>, + [ + {proxy_host, ProxyHost}, + {proxy_port, ProxyPort}, + {mqtt_topic, MQTTTopic}, + {mqtt_qos, 0}, + {mqtt_payload, full_message}, + {num_partitions, 3} + ]. + +common_end_per_group(Config) -> + ProxyHost = ?config(proxy_host, Config), + ProxyPort = ?config(proxy_port, Config), + emqx_common_test_helpers:reset_proxy(ProxyHost, ProxyPort), + delete_all_bridges(), + ok. + +end_per_group(Group, Config) when + Group =:= plain; + Group =:= ssl; + Group =:= sasl_plain; + Group =:= sasl_ssl +-> + common_end_per_group(Config), + ok; +end_per_group(_Group, _Config) -> + ok. + +init_per_testcase(TestCase, Config) when + TestCase =:= t_failed_creation_then_fixed +-> + KafkaType = ?config(kafka_type, Config), + AuthMechanism = ?config(sasl_auth_mechanism, Config), + IsSASL = lists:member(KafkaType, [sasl_plain, sasl_ssl]), + case {IsSASL, AuthMechanism} of + {true, kerberos} -> + [{skip_does_not_apply, true}]; + {true, _} -> + common_init_per_testcase(TestCase, Config); + {false, _} -> + [{skip_does_not_apply, true}] + end; +init_per_testcase(TestCase, Config) when + TestCase =:= t_failed_creation_then_fixed; + TestCase =:= t_on_get_status; + TestCase =:= t_receive_after_recovery +-> + HasProxy = proplists:get_value(has_proxy, Config, true), + case HasProxy of + false -> + [{skip_does_not_apply, true}]; + true -> + common_init_per_testcase(TestCase, Config) + end; +init_per_testcase(t_cluster_group = TestCase, Config0) -> + Config = emqx_misc:merge_opts(Config0, [{num_partitions, 6}]), + common_init_per_testcase(TestCase, Config); +init_per_testcase(t_multiple_topic_mappings = TestCase, Config0) -> + KafkaTopicBase = + << + (atom_to_binary(TestCase))/binary, + (integer_to_binary(erlang:unique_integer()))/binary + >>, + MQTTTopicBase = + <<"mqtt/", (atom_to_binary(TestCase))/binary, + (integer_to_binary(erlang:unique_integer()))/binary, "/">>, + TopicMapping = + [ + #{ + kafka_topic => <>, + mqtt_topic => <>, + qos => 1, + payload_template => <<"${.}">> + }, + #{ + kafka_topic => <>, + mqtt_topic => <>, + qos => 2, + payload_template => <<"v = ${.value}">> + } + ], + Config = [{topic_mapping, TopicMapping} | Config0], + common_init_per_testcase(TestCase, Config); +init_per_testcase(TestCase, Config) -> + common_init_per_testcase(TestCase, Config). + +common_init_per_testcase(TestCase, Config0) -> + ct:timetrap(timer:seconds(60)), + delete_all_bridges(), + KafkaTopic = + << + (atom_to_binary(TestCase))/binary, + (integer_to_binary(erlang:unique_integer()))/binary + >>, + KafkaType = ?config(kafka_type, Config0), + UniqueNum = integer_to_binary(erlang:unique_integer()), + MQTTTopic = proplists:get_value(mqtt_topic, Config0, <<"mqtt/topic/", UniqueNum/binary>>), + MQTTQoS = proplists:get_value(mqtt_qos, Config0, 0), + DefaultTopicMapping = [ + #{ + kafka_topic => KafkaTopic, + mqtt_topic => MQTTTopic, + qos => MQTTQoS, + payload_template => <<"${.}">> + } + ], + TopicMapping = proplists:get_value(topic_mapping, Config0, DefaultTopicMapping), + Config = [ + {kafka_topic, KafkaTopic}, + {topic_mapping, TopicMapping} + | Config0 + ], + {Name, ConfigString, KafkaConfig} = kafka_config( + TestCase, KafkaType, Config + ), + ensure_topics(Config), + ProducersConfigs = start_producers(TestCase, Config), + ok = snabbkaffe:start_trace(), + [ + {kafka_name, Name}, + {kafka_config_string, ConfigString}, + {kafka_config, KafkaConfig}, + {kafka_producers, ProducersConfigs} + | Config + ]. + +end_per_testcase(_Testcase, Config) -> + case proplists:get_bool(skip_does_not_apply, Config) of + true -> + ok; + false -> + ProxyHost = ?config(proxy_host, Config), + ProxyPort = ?config(proxy_port, Config), + ProducersConfigs = ?config(kafka_producers, Config), + emqx_common_test_helpers:reset_proxy(ProxyHost, ProxyPort), + delete_all_bridges(), + #{clientid := KafkaProducerClientId, producers := ProducersMapping} = + ProducersConfigs, + lists:foreach( + fun(Producers) -> + ok = wolff:stop_and_delete_supervised_producers(Producers) + end, + maps:values(ProducersMapping) + ), + ok = wolff:stop_and_delete_supervised_client(KafkaProducerClientId), + emqx_common_test_helpers:call_janitor(), + ok = snabbkaffe:stop(), + ok + end. + +%%------------------------------------------------------------------------------ +%% Helper fns +%%------------------------------------------------------------------------------ + +start_producers(TestCase, Config) -> + TopicMapping = ?config(topic_mapping, Config), + KafkaClientId = + <<"test-client-", (atom_to_binary(TestCase))/binary, + (integer_to_binary(erlang:unique_integer()))/binary>>, + DirectKafkaHost = ?config(direct_kafka_host, Config), + DirectKafkaPort = ?config(direct_kafka_port, Config), + UseTLS = ?config(use_tls, Config), + UseSASL = ?config(use_sasl, Config), + Hosts = emqx_bridge_impl_kafka:hosts( + DirectKafkaHost ++ ":" ++ integer_to_list(DirectKafkaPort) + ), + SSL = + case UseTLS of + true -> + %% hint: when running locally, need to + %% `chmod og+rw` those files to be readable. + emqx_tls_lib:to_client_opts( + #{ + keyfile => shared_secret(client_keyfile), + certfile => shared_secret(client_certfile), + cacertfile => shared_secret(client_cacertfile), + verify => verify_none, + enable => true + } + ); + false -> + [] + end, + SASL = + case UseSASL of + true -> {plain, <<"emqxuser">>, <<"password">>}; + false -> undefined + end, + ClientConfig = #{ + min_metadata_refresh_interval => 5_000, + connect_timeout => 5_000, + client_id => KafkaClientId, + request_timeout => 1_000, + sasl => SASL, + ssl => SSL + }, + {ok, Clients} = wolff:ensure_supervised_client(KafkaClientId, Hosts, ClientConfig), + ProducersData0 = + #{ + clients => Clients, + clientid => KafkaClientId, + producers => #{} + }, + lists:foldl( + fun(#{kafka_topic := KafkaTopic}, #{producers := ProducersMapping0} = Acc) -> + Producers = do_start_producer(KafkaClientId, KafkaTopic), + ProducersMapping = ProducersMapping0#{KafkaTopic => Producers}, + Acc#{producers := ProducersMapping} + end, + ProducersData0, + TopicMapping + ). + +do_start_producer(KafkaClientId, KafkaTopic) -> + Name = binary_to_atom(<>), + ProducerConfig = + #{ + name => Name, + partitioner => roundrobin, + partition_count_refresh_interval_seconds => 1_000, + replayq_max_total_bytes => 10_000, + replayq_seg_bytes => 9_000, + drop_if_highmem => false, + required_acks => leader_only, + max_batch_bytes => 900_000, + max_send_ahead => 0, + compression => no_compression, + telemetry_meta_data => #{} + }, + {ok, Producers} = wolff:ensure_supervised_producers(KafkaClientId, KafkaTopic, ProducerConfig), + Producers. + +ensure_topics(Config) -> + TopicMapping = ?config(topic_mapping, Config), + KafkaHost = ?config(kafka_host, Config), + KafkaPort = ?config(kafka_port, Config), + UseTLS = ?config(use_tls, Config), + UseSASL = ?config(use_sasl, Config), + NumPartitions = proplists:get_value(num_partitions, Config, 3), + Endpoints = [{KafkaHost, KafkaPort}], + TopicConfigs = [ + #{ + name => KafkaTopic, + num_partitions => NumPartitions, + replication_factor => 1, + assignments => [], + configs => [] + } + || #{kafka_topic := KafkaTopic} <- TopicMapping + ], + RequestConfig = #{timeout => 5_000}, + ConnConfig0 = + case UseTLS of + true -> + %% hint: when running locally, need to + %% `chmod og+rw` those files to be readable. + #{ + ssl => emqx_tls_lib:to_client_opts( + #{ + keyfile => shared_secret(client_keyfile), + certfile => shared_secret(client_certfile), + cacertfile => shared_secret(client_cacertfile), + verify => verify_none, + enable => true + } + ) + }; + false -> + #{} + end, + ConnConfig = + case UseSASL of + true -> + ConnConfig0#{sasl => {plain, <<"emqxuser">>, <<"password">>}}; + false -> + ConnConfig0#{sasl => undefined} + end, + case brod:create_topics(Endpoints, TopicConfigs, RequestConfig, ConnConfig) of + ok -> ok; + {error, topic_already_exists} -> ok + end. + +shared_secret_path() -> + os:getenv("CI_SHARED_SECRET_PATH", "/var/lib/secret"). + +shared_secret(client_keyfile) -> + filename:join([shared_secret_path(), "client.key"]); +shared_secret(client_certfile) -> + filename:join([shared_secret_path(), "client.crt"]); +shared_secret(client_cacertfile) -> + filename:join([shared_secret_path(), "ca.crt"]); +shared_secret(rig_keytab) -> + filename:join([shared_secret_path(), "rig.keytab"]). + +publish(Config, Messages) -> + %% pick the first topic if not specified + #{producers := ProducersMapping} = ?config(kafka_producers, Config), + [{KafkaTopic, Producers} | _] = maps:to_list(ProducersMapping), + ct:pal("publishing to ~p:\n ~p", [KafkaTopic, Messages]), + {_Partition, _OffsetReply} = wolff:send_sync(Producers, Messages, 10_000). + +publish(Config, KafkaTopic, Messages) -> + #{producers := ProducersMapping} = ?config(kafka_producers, Config), + #{KafkaTopic := Producers} = ProducersMapping, + ct:pal("publishing to ~p:\n ~p", [KafkaTopic, Messages]), + {_Partition, _OffsetReply} = wolff:send_sync(Producers, Messages, 10_000). + +kafka_config(TestCase, _KafkaType, Config) -> + UniqueNum = integer_to_binary(erlang:unique_integer()), + KafkaHost = ?config(kafka_host, Config), + KafkaPort = ?config(kafka_port, Config), + KafkaTopic = ?config(kafka_topic, Config), + AuthType = proplists:get_value(sasl_auth_mechanism, Config, none), + UseTLS = proplists:get_value(use_tls, Config, false), + Name = << + (atom_to_binary(TestCase))/binary, UniqueNum/binary + >>, + MQTTTopic = proplists:get_value(mqtt_topic, Config, <<"mqtt/topic/", UniqueNum/binary>>), + MQTTQoS = proplists:get_value(mqtt_qos, Config, 0), + DefaultTopicMapping = [ + #{ + kafka_topic => KafkaTopic, + mqtt_topic => MQTTTopic, + qos => MQTTQoS, + payload_template => <<"${.}">> + } + ], + TopicMapping0 = proplists:get_value(topic_mapping, Config, DefaultTopicMapping), + TopicMappingStr = topic_mapping(TopicMapping0), + ConfigString = + io_lib:format( + "bridges.kafka_consumer.~s {\n" + " enable = true\n" + " bootstrap_hosts = \"~p:~b\"\n" + " connect_timeout = 5s\n" + " min_metadata_refresh_interval = 3s\n" + " metadata_request_timeout = 5s\n" + "~s" + " kafka {\n" + " max_batch_bytes = 896KB\n" + " max_rejoin_attempts = 5\n" + " offset_commit_interval_seconds = 3\n" + %% todo: matrix this + " offset_reset_policy = reset_to_latest\n" + " }\n" + "~s" + " key_encoding_mode = none\n" + " value_encoding_mode = none\n" + " ssl {\n" + " enable = ~p\n" + " verify = verify_none\n" + " server_name_indication = \"auto\"\n" + " }\n" + "}\n", + [ + Name, + KafkaHost, + KafkaPort, + authentication(AuthType), + TopicMappingStr, + UseTLS + ] + ), + {Name, ConfigString, parse_and_check(ConfigString, Name)}. + +topic_mapping(TopicMapping0) -> + Template0 = << + "{kafka_topic = \"{{ kafka_topic }}\"," + " mqtt_topic = \"{{ mqtt_topic }}\"," + " qos = {{ qos }}," + " payload_template = \"{{{ payload_template }}}\" }" + >>, + Template = bbmustache:parse_binary(Template0), + Entries = + lists:map( + fun(Params) -> + bbmustache:compile(Template, Params, [{key_type, atom}]) + end, + TopicMapping0 + ), + iolist_to_binary( + [ + " topic_mapping = [", + lists:join(<<",\n">>, Entries), + "]\n" + ] + ). + +authentication(Type) when + Type =:= scram_sha_256; + Type =:= scram_sha_512; + Type =:= plain +-> + io_lib:format( + " authentication = {\n" + " mechanism = ~p\n" + " username = emqxuser\n" + " password = password\n" + " }\n", + [Type] + ); +authentication(kerberos) -> + %% TODO: how to make this work locally outside docker??? + io_lib:format( + " authentication = {\n" + " kerberos_principal = rig@KDC.EMQX.NET\n" + " kerberos_keytab_file = \"~s\"\n" + " }\n", + [shared_secret(rig_keytab)] + ); +authentication(_) -> + " authentication = none\n". + +parse_and_check(ConfigString, Name) -> + {ok, RawConf} = hocon:binary(ConfigString, #{format => map}), + TypeBin = ?BRIDGE_TYPE_BIN, + hocon_tconf:check_plain(emqx_bridge_schema, RawConf, #{required => false, atom_key => false}), + #{<<"bridges">> := #{TypeBin := #{Name := Config}}} = RawConf, + Config. + +create_bridge(Config) -> + create_bridge(Config, _Overrides = #{}). + +create_bridge(Config, Overrides) -> + Type = ?BRIDGE_TYPE_BIN, + Name = ?config(kafka_name, Config), + KafkaConfig0 = ?config(kafka_config, Config), + KafkaConfig = emqx_map_lib:deep_merge(KafkaConfig0, Overrides), + emqx_bridge:create(Type, Name, KafkaConfig). + +delete_bridge(Config) -> + Type = ?BRIDGE_TYPE_BIN, + Name = ?config(kafka_name, Config), + emqx_bridge:remove(Type, Name). + +delete_all_bridges() -> + lists:foreach( + fun(#{name := Name, type := Type}) -> + emqx_bridge:remove(Type, Name) + end, + emqx_bridge:list() + ). + +create_bridge_api(Config) -> + create_bridge_api(Config, _Overrides = #{}). + +create_bridge_api(Config, Overrides) -> + TypeBin = ?BRIDGE_TYPE_BIN, + Name = ?config(kafka_name, Config), + KafkaConfig0 = ?config(kafka_config, Config), + KafkaConfig = emqx_map_lib:deep_merge(KafkaConfig0, Overrides), + Params = KafkaConfig#{<<"type">> => TypeBin, <<"name">> => Name}, + Path = emqx_mgmt_api_test_util:api_path(["bridges"]), + AuthHeader = emqx_mgmt_api_test_util:auth_header_(), + Opts = #{return_all => true}, + ct:pal("creating bridge (via http): ~p", [Params]), + Res = + case emqx_mgmt_api_test_util:request_api(post, Path, "", AuthHeader, Params, Opts) of + {ok, {Status, Headers, Body0}} -> + {ok, {Status, Headers, emqx_json:decode(Body0, [return_maps])}}; + Error -> + Error + end, + ct:pal("bridge create result: ~p", [Res]), + Res. + +update_bridge_api(Config) -> + update_bridge_api(Config, _Overrides = #{}). + +update_bridge_api(Config, Overrides) -> + TypeBin = ?BRIDGE_TYPE_BIN, + Name = ?config(kafka_name, Config), + KafkaConfig0 = ?config(kafka_config, Config), + KafkaConfig = emqx_map_lib:deep_merge(KafkaConfig0, Overrides), + BridgeId = emqx_bridge_resource:bridge_id(TypeBin, Name), + Params = KafkaConfig#{<<"type">> => TypeBin, <<"name">> => Name}, + Path = emqx_mgmt_api_test_util:api_path(["bridges", BridgeId]), + AuthHeader = emqx_mgmt_api_test_util:auth_header_(), + Opts = #{return_all => true}, + ct:pal("updating bridge (via http): ~p", [Params]), + Res = + case emqx_mgmt_api_test_util:request_api(put, Path, "", AuthHeader, Params, Opts) of + {ok, {_Status, _Headers, Body0}} -> {ok, emqx_json:decode(Body0, [return_maps])}; + Error -> Error + end, + ct:pal("bridge update result: ~p", [Res]), + Res. + +probe_bridge_api(Config) -> + TypeBin = ?BRIDGE_TYPE_BIN, + Name = ?config(kafka_name, Config), + KafkaConfig = ?config(kafka_config, Config), + Params = KafkaConfig#{<<"type">> => TypeBin, <<"name">> => Name}, + Path = emqx_mgmt_api_test_util:api_path(["bridges_probe"]), + AuthHeader = emqx_mgmt_api_test_util:auth_header_(), + Opts = #{return_all => true}, + ct:pal("probing bridge (via http): ~p", [Params]), + Res = + case emqx_mgmt_api_test_util:request_api(post, Path, "", AuthHeader, Params, Opts) of + {ok, {{_, 204, _}, _Headers, _Body0} = Res0} -> {ok, Res0}; + Error -> Error + end, + ct:pal("bridge probe result: ~p", [Res]), + Res. + +send_message(Config, Payload) -> + Name = ?config(kafka_name, Config), + Type = ?BRIDGE_TYPE_BIN, + BridgeId = emqx_bridge_resource:bridge_id(Type, Name), + emqx_bridge:send_message(BridgeId, Payload). + +resource_id(Config) -> + Type = ?BRIDGE_TYPE_BIN, + Name = ?config(kafka_name, Config), + emqx_bridge_resource:resource_id(Type, Name). + +instance_id(Config) -> + ResourceId = resource_id(Config), + [{_, InstanceId}] = ets:lookup(emqx_resource_manager, {owner, ResourceId}), + InstanceId. + +wait_for_expected_published_messages(Messages0, Timeout) -> + Messages = maps:from_list([{K, Msg} || Msg = #{key := K} <- Messages0]), + do_wait_for_expected_published_messages(Messages, [], Timeout). + +do_wait_for_expected_published_messages(Messages, Acc, _Timeout) when map_size(Messages) =:= 0 -> + lists:reverse(Acc); +do_wait_for_expected_published_messages(Messages0, Acc0, Timeout) -> + receive + {publish, Msg0 = #{payload := Payload}} -> + case emqx_json:safe_decode(Payload, [return_maps]) of + {error, _} -> + ct:pal("unexpected message: ~p; discarding", [Msg0]), + do_wait_for_expected_published_messages(Messages0, Acc0, Timeout); + {ok, Decoded = #{<<"key">> := K}} when is_map_key(K, Messages0) -> + Msg = Msg0#{payload := Decoded}, + ct:pal("received expected message: ~p", [Msg]), + Acc = [Msg | Acc0], + Messages = maps:remove(K, Messages0), + do_wait_for_expected_published_messages(Messages, Acc, Timeout); + {ok, Decoded} -> + ct:pal("unexpected message: ~p; discarding", [Msg0#{payload := Decoded}]), + do_wait_for_expected_published_messages(Messages0, Acc0, Timeout) + end + after Timeout -> + error( + {timed_out_waiting_for_published_messages, #{ + so_far => Acc0, + remaining => Messages0, + mailbox => process_info(self(), messages) + }} + ) + end. + +receive_published() -> + receive_published(#{}). + +receive_published(Opts0) -> + Default = #{n => 1, timeout => 10_000}, + Opts = maps:merge(Default, Opts0), + receive_published(Opts, []). + +receive_published(#{n := N, timeout := _Timeout}, Acc) when N =< 0 -> + lists:reverse(Acc); +receive_published(#{n := N, timeout := Timeout} = Opts, Acc) -> + receive + {publish, Msg} -> + receive_published(Opts#{n := N - 1}, [Msg | Acc]) + after Timeout -> + error( + {timeout, #{ + msgs_so_far => Acc, + mailbox => process_info(self(), messages), + expected_remaining => N + }} + ) + end. + +wait_until_subscribers_are_ready(N, Timeout) -> + {ok, _} = + snabbkaffe:block_until( + ?match_n_events(N, #{?snk_kind := kafka_consumer_subscriber_init}), + Timeout + ), + ok. + +%% kinda hacky, but for yet unknown reasons kafka/brod seem a bit +%% flaky about when they decide truly consuming the messages... +%% `Period' should be greater than the `sleep_timeout' of the consumer +%% (default 1 s). +ping_until_healthy(Config, Period, Timeout) -> + #{producers := ProducersMapping} = ?config(kafka_producers, Config), + [KafkaTopic | _] = maps:keys(ProducersMapping), + ping_until_healthy(Config, KafkaTopic, Period, Timeout). + +ping_until_healthy(_Config, _KafkaTopic, _Period, Timeout) when Timeout =< 0 -> + ct:fail("kafka subscriber did not stabilize!"); +ping_until_healthy(Config, KafkaTopic, Period, Timeout) -> + TimeA = erlang:monotonic_time(millisecond), + Payload = emqx_guid:to_hexstr(emqx_guid:gen()), + publish(Config, KafkaTopic, [#{key => <<"probing">>, value => Payload}]), + Res = + ?block_until( + #{ + ?snk_kind := kafka_consumer_handle_message, + ?snk_span := {complete, _}, + message := #kafka_message{value = Payload} + }, + Period + ), + case Res of + timeout -> + TimeB = erlang:monotonic_time(millisecond), + ConsumedTime = TimeB - TimeA, + ping_until_healthy(Config, Period, Timeout - ConsumedTime); + {ok, _} -> + ResourceId = resource_id(Config), + emqx_resource_manager:reset_metrics(ResourceId), + ok + end. + +ensure_connected(Config) -> + ?retry( + _Interval = 500, + _NAttempts = 20, + {ok, _} = get_client_connection(Config) + ), + ok. + +consumer_clientid(Config) -> + KafkaName = ?config(kafka_name, Config), + binary_to_atom(emqx_bridge_impl_kafka:make_client_id(kafka_consumer, KafkaName)). + +get_client_connection(Config) -> + KafkaHost = ?config(kafka_host, Config), + KafkaPort = ?config(kafka_port, Config), + ClientID = consumer_clientid(Config), + brod_client:get_connection(ClientID, KafkaHost, KafkaPort). + +get_subscriber_workers() -> + [{_, SubscriberPid, _, _}] = supervisor:which_children(emqx_ee_bridge_kafka_consumer_sup), + brod_group_subscriber_v2:get_workers(SubscriberPid). + +wait_downs(Refs, _Timeout) when map_size(Refs) =:= 0 -> + ok; +wait_downs(Refs0, Timeout) -> + receive + {'DOWN', Ref, process, _Pid, _Reason} when is_map_key(Ref, Refs0) -> + Refs = maps:remove(Ref, Refs0), + wait_downs(Refs, Timeout) + after Timeout -> + ct:fail("processes didn't die; remaining: ~p", [map_size(Refs0)]) + end. + +create_rule_and_action_http(Config) -> + KafkaName = ?config(kafka_name, Config), + MQTTTopic = ?config(mqtt_topic, Config), + BridgeId = emqx_bridge_resource:bridge_id(?BRIDGE_TYPE_BIN, KafkaName), + ActionFn = <<(atom_to_binary(?MODULE))/binary, ":action_response">>, + Params = #{ + enable => true, + sql => <<"SELECT * FROM \"$bridges/", BridgeId/binary, "\"">>, + actions => + [ + #{ + <<"function">> => <<"republish">>, + <<"args">> => + #{ + <<"topic">> => <<"republish/", MQTTTopic/binary>>, + <<"payload">> => <<>>, + <<"qos">> => 0, + <<"retain">> => false, + <<"user_properties">> => <<"${headers}">> + } + }, + #{<<"function">> => ActionFn} + ] + }, + Path = emqx_mgmt_api_test_util:api_path(["rules"]), + AuthHeader = emqx_mgmt_api_test_util:auth_header_(), + ct:pal("rule action params: ~p", [Params]), + case emqx_mgmt_api_test_util:request_api(post, Path, "", AuthHeader, Params) of + {ok, Res} -> {ok, emqx_json:decode(Res, [return_maps])}; + Error -> Error + end. + +action_response(Selected, Envs, Args) -> + ?tp(action_response, #{ + selected => Selected, + envs => Envs, + args => Args + }), + ok. + +wait_until_group_is_balanced(KafkaTopic, NPartitions, Nodes, Timeout) -> + do_wait_until_group_is_balanced(KafkaTopic, NPartitions, Nodes, Timeout, #{}). + +do_wait_until_group_is_balanced(KafkaTopic, NPartitions, Nodes, Timeout, Acc0) -> + AllPartitionsCovered = map_size(Acc0) =:= NPartitions, + PresentNodes = lists:usort([N || {_Partition, {N, _MemberId}} <- maps:to_list(Acc0)]), + AllNodesCovered = PresentNodes =:= lists:usort(Nodes), + case AllPartitionsCovered andalso AllNodesCovered of + true -> + ct:pal("group balanced: ~p", [Acc0]), + {ok, Acc0}; + false -> + receive + {kafka_assignment, Node, {Pid, MemberId, GenerationId, TopicAssignments}} -> + Event = #{ + node => Node, + pid => Pid, + member_id => MemberId, + generation_id => GenerationId, + topic_assignments => TopicAssignments + }, + Acc = reconstruct_assignments_from_events(KafkaTopic, [Event], Acc0), + do_wait_until_group_is_balanced(KafkaTopic, NPartitions, Nodes, Timeout, Acc) + after Timeout -> + {timeout, Acc0} + end + end. + +reconstruct_assignments_from_events(KafkaTopic, Events) -> + reconstruct_assignments_from_events(KafkaTopic, Events, #{}). + +reconstruct_assignments_from_events(KafkaTopic, Events0, Acc0) -> + %% when running the test multiple times with the same kafka + %% cluster, kafka will send assignments from old test topics that + %% we must discard. + Assignments = [ + {MemberId, Node, P} + || #{ + node := Node, + member_id := MemberId, + topic_assignments := Assignments + } <- Events0, + #brod_received_assignment{topic = T, partition = P} <- Assignments, + T =:= KafkaTopic + ], + ct:pal("assignments for topic ~p:\n ~p", [KafkaTopic, Assignments]), + lists:foldl( + fun({MemberId, Node, Partition}, Acc) -> + Acc#{Partition => {Node, MemberId}} + end, + Acc0, + Assignments + ). + +setup_group_subscriber_spy(Node) -> + TestPid = self(), + ok = erpc:call( + Node, + fun() -> + ok = meck:new(brod_group_subscriber_v2, [ + passthrough, no_link, no_history, non_strict + ]), + ok = meck:expect( + brod_group_subscriber_v2, + assignments_received, + fun(Pid, MemberId, GenerationId, TopicAssignments) -> + ?tp( + kafka_assignment, + #{ + node => node(), + pid => Pid, + member_id => MemberId, + generation_id => GenerationId, + topic_assignments => TopicAssignments + } + ), + TestPid ! + {kafka_assignment, node(), {Pid, MemberId, GenerationId, TopicAssignments}}, + meck:passthrough([Pid, MemberId, GenerationId, TopicAssignments]) + end + ), + ok + end + ). + +wait_for_cluster_rpc(Node) -> + %% need to wait until the config handler is ready after + %% restarting during the cluster join. + ?retry( + _Sleep0 = 100, + _Attempts0 = 50, + true = is_pid(erpc:call(Node, erlang, whereis, [emqx_config_handler])) + ). + +setup_and_start_listeners(Node, NodeOpts) -> + erpc:call( + Node, + fun() -> + lists:foreach( + fun(Type) -> + Port = emqx_common_test_helpers:listener_port(NodeOpts, Type), + ok = emqx_config:put( + [listeners, Type, default, bind], + {{127, 0, 0, 1}, Port} + ), + ok = emqx_config:put_raw( + [listeners, Type, default, bind], + iolist_to_binary([<<"127.0.0.1:">>, integer_to_binary(Port)]) + ), + ok + end, + [tcp, ssl, ws, wss] + ), + ok = emqx_listeners:start(), + ok + end + ). + +cluster(Config) -> + PrivDataDir = ?config(priv_dir, Config), + PeerModule = + case os:getenv("IS_CI") of + false -> + slave; + _ -> + ct_slave + end, + Cluster = emqx_common_test_helpers:emqx_cluster( + [core, core], + [ + {apps, [emqx_conf, emqx_bridge, emqx_rule_engine]}, + {listener_ports, []}, + {peer_mod, PeerModule}, + {priv_data_dir, PrivDataDir}, + {load_schema, true}, + {start_autocluster, true}, + {schema_mod, emqx_ee_conf_schema}, + {env_handler, fun + (emqx) -> + application:set_env(emqx, boot_modules, [broker, router]), + ok; + (emqx_conf) -> + ok; + (_) -> + ok + end} + ] + ), + ct:pal("cluster: ~p", [Cluster]), + Cluster. + +start_async_publisher(Config, KafkaTopic) -> + TId = ets:new(kafka_payloads, [public, ordered_set]), + Loop = fun Go() -> + receive + stop -> ok + after 0 -> + Payload = emqx_guid:to_hexstr(emqx_guid:gen()), + publish(Config, KafkaTopic, [#{key => Payload, value => Payload}]), + ets:insert(TId, {Payload}), + timer:sleep(400), + Go() + end + end, + Pid = spawn_link(Loop), + {TId, Pid}. + +stop_async_publisher(Pid) -> + MRef = monitor(process, Pid), + Pid ! stop, + receive + {'DOWN', MRef, process, Pid, _} -> + ok + after 1_000 -> + ct:fail("publisher didn't die") + end, + ok. + +%%------------------------------------------------------------------------------ +%% Testcases +%%------------------------------------------------------------------------------ + +t_start_and_consume_ok(Config) -> + MQTTTopic = ?config(mqtt_topic, Config), + MQTTQoS = ?config(mqtt_qos, Config), + KafkaTopic = ?config(kafka_topic, Config), + NPartitions = ?config(num_partitions, Config), + ResourceId = resource_id(Config), + Payload = emqx_guid:to_hexstr(emqx_guid:gen()), + ?check_trace( + begin + ?assertMatch( + {ok, _}, + create_bridge(Config) + ), + wait_until_subscribers_are_ready(NPartitions, 40_000), + ping_until_healthy(Config, _Period = 1_500, _Timeout = 24_000), + {ok, C} = emqtt:start_link(), + on_exit(fun() -> emqtt:stop(C) end), + {ok, _} = emqtt:connect(C), + {ok, _, [0]} = emqtt:subscribe(C, MQTTTopic), + + {Res, {ok, _}} = + ?wait_async_action( + publish(Config, [ + #{ + key => <<"mykey">>, + value => Payload, + headers => [{<<"hkey">>, <<"hvalue">>}] + } + ]), + #{?snk_kind := kafka_consumer_handle_message, ?snk_span := {complete, _}}, + 20_000 + ), + + %% Check that the bridge probe API doesn't leak atoms. + ProbeRes = probe_bridge_api(Config), + ?assertMatch({ok, {{_, 204, _}, _Headers, _Body}}, ProbeRes), + AtomsBefore = erlang:system_info(atom_count), + %% Probe again; shouldn't have created more atoms. + ?assertMatch({ok, {{_, 204, _}, _Headers, _Body}}, ProbeRes), + AtomsAfter = erlang:system_info(atom_count), + ?assertEqual(AtomsBefore, AtomsAfter), + + Res + end, + fun({_Partition, OffsetReply}, Trace) -> + ?assertMatch([_, _ | _], ?of_kind(kafka_consumer_handle_message, Trace)), + Published = receive_published(), + ?assertMatch( + [ + #{ + qos := MQTTQoS, + topic := MQTTTopic, + payload := _ + } + ], + Published + ), + [#{payload := PayloadBin}] = Published, + ?assertMatch( + #{ + <<"value">> := Payload, + <<"key">> := <<"mykey">>, + <<"topic">> := KafkaTopic, + <<"offset">> := OffsetReply, + <<"headers">> := #{<<"hkey">> := <<"hvalue">>} + }, + emqx_json:decode(PayloadBin, [return_maps]), + #{ + offset_reply => OffsetReply, + kafka_topic => KafkaTopic, + payload => Payload + } + ), + ?assertEqual(1, emqx_resource_metrics:received_get(ResourceId)), + ok + end + ), + ok. + +t_multiple_topic_mappings(Config) -> + TopicMapping = ?config(topic_mapping, Config), + MQTTTopics = [MQTTTopic || #{mqtt_topic := MQTTTopic} <- TopicMapping], + KafkaTopics = [KafkaTopic || #{kafka_topic := KafkaTopic} <- TopicMapping], + NumMQTTTopics = length(MQTTTopics), + NPartitions = ?config(num_partitions, Config), + ResourceId = resource_id(Config), + Payload = emqx_guid:to_hexstr(emqx_guid:gen()), + ?check_trace( + begin + ?assertMatch( + {ok, {{_, 201, _}, _, _}}, + create_bridge_api(Config) + ), + wait_until_subscribers_are_ready(NPartitions, 40_000), + lists:foreach( + fun(KafkaTopic) -> + ping_until_healthy(Config, KafkaTopic, _Period = 1_500, _Timeout = 24_000) + end, + KafkaTopics + ), + + {ok, C} = emqtt:start_link([{proto_ver, v5}]), + on_exit(fun() -> emqtt:stop(C) end), + {ok, _} = emqtt:connect(C), + lists:foreach( + fun(MQTTTopic) -> + %% we use the hightest QoS so that we can check what + %% the subscription was. + QoS2Granted = 2, + {ok, _, [QoS2Granted]} = emqtt:subscribe(C, MQTTTopic, ?QOS_2) + end, + MQTTTopics + ), + + {ok, SRef0} = + snabbkaffe:subscribe( + ?match_event(#{ + ?snk_kind := kafka_consumer_handle_message, ?snk_span := {complete, _} + }), + NumMQTTTopics, + _Timeout0 = 20_000 + ), + lists:foreach( + fun(KafkaTopic) -> + publish(Config, KafkaTopic, [ + #{ + key => <<"mykey">>, + value => Payload, + headers => [{<<"hkey">>, <<"hvalue">>}] + } + ]) + end, + KafkaTopics + ), + {ok, _} = snabbkaffe:receive_events(SRef0), + + %% Check that the bridge probe API doesn't leak atoms. + ProbeRes = probe_bridge_api(Config), + ?assertMatch({ok, {{_, 204, _}, _Headers, _Body}}, ProbeRes), + AtomsBefore = erlang:system_info(atom_count), + %% Probe again; shouldn't have created more atoms. + ?assertMatch({ok, {{_, 204, _}, _Headers, _Body}}, ProbeRes), + AtomsAfter = erlang:system_info(atom_count), + ?assertEqual(AtomsBefore, AtomsAfter), + + ok + end, + fun(Trace) -> + %% two messages processed with begin/end events + ?assertMatch([_, _, _, _ | _], ?of_kind(kafka_consumer_handle_message, Trace)), + Published = receive_published(#{n => NumMQTTTopics}), + lists:foreach( + fun( + #{ + mqtt_topic := MQTTTopic, + qos := MQTTQoS + } + ) -> + [Msg] = [ + Msg + || Msg = #{topic := T} <- Published, + T =:= MQTTTopic + ], + ?assertMatch( + #{ + qos := MQTTQoS, + topic := MQTTTopic, + payload := _ + }, + Msg + ) + end, + TopicMapping + ), + %% check that we observed the different payload templates + %% as configured. + Payloads = + lists:sort([ + case emqx_json:safe_decode(P, [return_maps]) of + {ok, Decoded} -> Decoded; + {error, _} -> P + end + || #{payload := P} <- Published + ]), + ?assertMatch( + [ + #{ + <<"headers">> := #{<<"hkey">> := <<"hvalue">>}, + <<"key">> := <<"mykey">>, + <<"offset">> := Offset, + <<"topic">> := KafkaTopic, + <<"ts">> := TS, + <<"ts_type">> := <<"create">>, + <<"value">> := Payload + }, + <<"v = ", Payload/binary>> + ] when is_integer(Offset) andalso is_integer(TS) andalso is_binary(KafkaTopic), + Payloads + ), + ?assertEqual(2, emqx_resource_metrics:received_get(ResourceId)), + ok + end + ), + ok. + +t_on_get_status(Config) -> + case proplists:get_bool(skip_does_not_apply, Config) of + true -> + ok; + false -> + do_t_on_get_status(Config) + end. + +do_t_on_get_status(Config) -> + ProxyPort = ?config(proxy_port, Config), + ProxyHost = ?config(proxy_host, Config), + ProxyName = ?config(proxy_name, Config), + KafkaName = ?config(kafka_name, Config), + ResourceId = emqx_bridge_resource:resource_id(kafka_consumer, KafkaName), + ?assertMatch( + {ok, _}, + create_bridge(Config) + ), + %% Since the connection process is async, we give it some time to + %% stabilize and avoid flakiness. + ct:sleep(1_200), + ?assertEqual({ok, connected}, emqx_resource_manager:health_check(ResourceId)), + emqx_common_test_helpers:with_failure(down, ProxyName, ProxyHost, ProxyPort, fun() -> + ct:sleep(500), + ?assertEqual({ok, disconnected}, emqx_resource_manager:health_check(ResourceId)) + end), + ok. + +%% ensure that we can create and use the bridge successfully after +%% creating it with bad config. +t_failed_creation_then_fixed(Config) -> + case proplists:get_bool(skip_does_not_apply, Config) of + true -> + ok; + false -> + ?check_trace(do_t_failed_creation_then_fixed(Config), []) + end. + +do_t_failed_creation_then_fixed(Config) -> + ct:timetrap({seconds, 180}), + MQTTTopic = ?config(mqtt_topic, Config), + MQTTQoS = ?config(mqtt_qos, Config), + KafkaTopic = ?config(kafka_topic, Config), + NPartitions = ?config(num_partitions, Config), + {ok, _} = create_bridge(Config, #{ + <<"authentication">> => #{<<"password">> => <<"wrong password">>} + }), + ?retry( + _Interval0 = 200, + _Attempts0 = 10, + begin + ClientConn0 = get_client_connection(Config), + case ClientConn0 of + {error, client_down} -> + ok; + {error, {client_down, _Stacktrace}} -> + ok; + _ -> + error({client_should_be_down, ClientConn0}) + end + end + ), + %% now, update with the correct configuration + ?assertMatch( + {{ok, _}, {ok, _}}, + ?wait_async_action( + update_bridge_api(Config), + #{?snk_kind := kafka_consumer_subscriber_started}, + 60_000 + ) + ), + wait_until_subscribers_are_ready(NPartitions, 120_000), + ResourceId = resource_id(Config), + ?assertEqual({ok, connected}, emqx_resource_manager:health_check(ResourceId)), + ping_until_healthy(Config, _Period = 1_500, _Timeout = 24_000), + + {ok, C} = emqtt:start_link(), + on_exit(fun() -> emqtt:stop(C) end), + {ok, _} = emqtt:connect(C), + {ok, _, [0]} = emqtt:subscribe(C, MQTTTopic), + Payload = emqx_guid:to_hexstr(emqx_guid:gen()), + + {_, {ok, _}} = + ?wait_async_action( + publish(Config, [ + #{ + key => <<"mykey">>, + value => Payload, + headers => [{<<"hkey">>, <<"hvalue">>}] + } + ]), + #{?snk_kind := kafka_consumer_handle_message, ?snk_span := {complete, _}}, + 20_000 + ), + Published = receive_published(), + ?assertMatch( + [ + #{ + qos := MQTTQoS, + topic := MQTTTopic, + payload := _ + } + ], + Published + ), + [#{payload := PayloadBin}] = Published, + ?assertMatch( + #{ + <<"value">> := Payload, + <<"key">> := <<"mykey">>, + <<"topic">> := KafkaTopic, + <<"offset">> := _, + <<"headers">> := #{<<"hkey">> := <<"hvalue">>} + }, + emqx_json:decode(PayloadBin, [return_maps]), + #{ + kafka_topic => KafkaTopic, + payload => Payload + } + ), + ok. + +%% check that we commit the offsets so that restarting an emqx node or +%% recovering from a network partition will make the subscribers +%% consume the messages produced during the down time. +t_receive_after_recovery(Config) -> + case proplists:get_bool(skip_does_not_apply, Config) of + true -> + ok; + false -> + do_t_receive_after_recovery(Config) + end. + +do_t_receive_after_recovery(Config) -> + ct:timetrap(120_000), + ProxyPort = ?config(proxy_port, Config), + ProxyHost = ?config(proxy_host, Config), + ProxyName = ?config(proxy_name, Config), + MQTTTopic = ?config(mqtt_topic, Config), + NPartitions = ?config(num_partitions, Config), + KafkaName = ?config(kafka_name, Config), + KafkaNameA = binary_to_atom(KafkaName), + KafkaClientId = consumer_clientid(Config), + ResourceId = resource_id(Config), + ?check_trace( + begin + {ok, _} = create_bridge(Config), + ping_until_healthy(Config, _Period = 1_500, _Timeout0 = 24_000), + {ok, connected} = emqx_resource_manager:health_check(ResourceId), + %% 0) ensure each partition commits its offset so it can + %% recover later. + Messages0 = [ + #{ + key => <<"commit", (integer_to_binary(N))/binary>>, + value => <<"commit", (integer_to_binary(N))/binary>> + } + || N <- lists:seq(1, NPartitions) + ], + %% we do distinct passes over this producing part so that + %% wolff won't batch everything together. + lists:foreach( + fun(Msg) -> + {_, {ok, _}} = + ?wait_async_action( + publish(Config, [Msg]), + #{ + ?snk_kind := kafka_consumer_handle_message, + ?snk_span := {complete, {ok, commit, _}} + }, + _Timeout1 = 2_000 + ) + end, + Messages0 + ), + ?retry( + _Interval = 500, + _NAttempts = 20, + begin + GroupId = emqx_bridge_impl_kafka_consumer:consumer_group_id(KafkaNameA), + {ok, [#{partitions := Partitions}]} = brod:fetch_committed_offsets( + KafkaClientId, GroupId + ), + NPartitions = length(Partitions) + end + ), + %% we need some time to avoid flakiness due to the + %% subscription happening while the consumers are still + %% publishing messages... + ct:sleep(500), + + %% 1) cut the connection with kafka. + WorkerRefs = maps:from_list([ + {monitor(process, Pid), Pid} + || {_TopicPartition, Pid} <- + maps:to_list(get_subscriber_workers()) + ]), + NumMsgs = 50, + Messages1 = [ + begin + X = emqx_guid:to_hexstr(emqx_guid:gen()), + #{ + key => X, + value => X + } + end + || _ <- lists:seq(1, NumMsgs) + ], + {ok, C} = emqtt:start_link(), + on_exit(fun() -> emqtt:stop(C) end), + {ok, _} = emqtt:connect(C), + {ok, _, [0]} = emqtt:subscribe(C, MQTTTopic), + emqx_common_test_helpers:with_failure(down, ProxyName, ProxyHost, ProxyPort, fun() -> + wait_downs(WorkerRefs, _Timeout2 = 1_000), + %% 2) publish messages while the consumer is down. + %% we use `pmap' to avoid wolff sending the whole + %% batch to a single partition. + emqx_misc:pmap(fun(Msg) -> publish(Config, [Msg]) end, Messages1), + ok + end), + %% 3) restore and consume messages + {ok, SRef1} = snabbkaffe:subscribe( + ?match_event(#{ + ?snk_kind := kafka_consumer_handle_message, + ?snk_span := {complete, _} + }), + NumMsgs, + _Timeout3 = 60_000 + ), + {ok, _} = snabbkaffe:receive_events(SRef1), + #{num_msgs => NumMsgs, msgs => lists:sort(Messages1)} + end, + fun(#{num_msgs := NumMsgs, msgs := ExpectedMsgs}, Trace) -> + Received0 = wait_for_expected_published_messages(ExpectedMsgs, _Timeout4 = 2_000), + Received1 = + lists:map( + fun(#{payload := #{<<"key">> := K, <<"value">> := V}}) -> + #{key => K, value => V} + end, + Received0 + ), + Received = lists:sort(Received1), + ?assertEqual(ExpectedMsgs, Received), + ?assert(length(?of_kind(kafka_consumer_handle_message, Trace)) > NumMsgs * 2), + ok + end + ), + ok. + +t_bridge_rule_action_source(Config) -> + MQTTTopic = ?config(mqtt_topic, Config), + KafkaTopic = ?config(kafka_topic, Config), + ResourceId = resource_id(Config), + ?check_trace( + begin + {ok, _} = create_bridge(Config), + ping_until_healthy(Config, _Period = 1_500, _Timeout = 24_000), + + {ok, #{<<"id">> := RuleId}} = create_rule_and_action_http(Config), + on_exit(fun() -> ok = emqx_rule_engine:delete_rule(RuleId) end), + + RepublishTopic = <<"republish/", MQTTTopic/binary>>, + {ok, C} = emqtt:start_link([{proto_ver, v5}]), + on_exit(fun() -> emqtt:stop(C) end), + {ok, _} = emqtt:connect(C), + {ok, _, [0]} = emqtt:subscribe(C, RepublishTopic), + + UniquePayload = emqx_guid:to_hexstr(emqx_guid:gen()), + {_, {ok, _}} = + ?wait_async_action( + publish(Config, [ + #{ + key => UniquePayload, + value => UniquePayload, + headers => [{<<"hkey">>, <<"hvalue">>}] + } + ]), + #{?snk_kind := action_response}, + 5_000 + ), + + #{republish_topic => RepublishTopic, unique_payload => UniquePayload} + end, + fun(Res, _Trace) -> + #{ + republish_topic := RepublishTopic, + unique_payload := UniquePayload + } = Res, + Published = receive_published(), + ?assertMatch( + [ + #{ + topic := RepublishTopic, + properties := #{'User-Property' := [{<<"hkey">>, <<"hvalue">>}]}, + payload := _Payload, + dup := false, + qos := 0, + retain := false + } + ], + Published + ), + [#{payload := RawPayload}] = Published, + ?assertMatch( + #{ + <<"key">> := UniquePayload, + <<"value">> := UniquePayload, + <<"headers">> := #{<<"hkey">> := <<"hvalue">>}, + <<"topic">> := KafkaTopic + }, + emqx_json:decode(RawPayload, [return_maps]) + ), + ?assertEqual(1, emqx_resource_metrics:received_get(ResourceId)), + ok + end + ), + ok. + +%% checks that an existing cluster can be configured with a kafka +%% consumer bridge and that the consumers will distribute over the two +%% nodes. +t_cluster_group(Config) -> + ct:timetrap({seconds, 150}), + NPartitions = ?config(num_partitions, Config), + KafkaTopic = ?config(kafka_topic, Config), + KafkaName = ?config(kafka_name, Config), + ResourceId = resource_id(Config), + BridgeId = emqx_bridge_resource:bridge_id(?BRIDGE_TYPE_BIN, KafkaName), + Cluster = cluster(Config), + ?check_trace( + begin + Nodes = + [_N1, N2 | _] = [ + emqx_common_test_helpers:start_slave(Name, Opts) + || {Name, Opts} <- Cluster + ], + on_exit(fun() -> + lists:foreach( + fun(N) -> + ct:pal("stopping ~p", [N]), + ok = emqx_common_test_helpers:stop_slave(N) + end, + Nodes + ) + end), + lists:foreach(fun setup_group_subscriber_spy/1, Nodes), + {ok, SRef0} = snabbkaffe:subscribe( + ?match_event(#{?snk_kind := kafka_consumer_subscriber_started}), + length(Nodes), + 15_000 + ), + wait_for_cluster_rpc(N2), + erpc:call(N2, fun() -> {ok, _} = create_bridge(Config) end), + {ok, _} = snabbkaffe:receive_events(SRef0), + lists:foreach( + fun(N) -> + ?assertMatch( + {ok, _}, + erpc:call(N, emqx_bridge, lookup, [BridgeId]), + #{node => N} + ) + end, + Nodes + ), + + %% give kafka some time to rebalance the group; we need to + %% sleep so that the two nodes have time to distribute the + %% subscribers, rather than just one node containing all + %% of them. + {ok, _} = wait_until_group_is_balanced(KafkaTopic, NPartitions, Nodes, 30_000), + lists:foreach( + fun(N) -> + ?assertEqual( + {ok, connected}, + erpc:call(N, emqx_resource_manager, health_check, [ResourceId]), + #{node => N} + ) + end, + Nodes + ), + + #{nodes => Nodes} + end, + fun(Res, Trace0) -> + #{nodes := Nodes} = Res, + Trace1 = ?of_kind(kafka_assignment, Trace0), + Assignments = reconstruct_assignments_from_events(KafkaTopic, Trace1), + ?assertEqual( + lists:usort(Nodes), + lists:usort([ + N + || {_Partition, {N, _MemberId}} <- + maps:to_list(Assignments) + ]) + ), + ?assertEqual(NPartitions, map_size(Assignments)), + ok + end + ), + ok. + +%% test that the kafka consumer group rebalances correctly if a bridge +%% already exists when a new EMQX node joins the cluster. +t_node_joins_existing_cluster(Config) -> + ct:timetrap({seconds, 150}), + TopicMapping = ?config(topic_mapping, Config), + [MQTTTopic] = [MQTTTopic || #{mqtt_topic := MQTTTopic} <- TopicMapping], + NPartitions = ?config(num_partitions, Config), + KafkaTopic = ?config(kafka_topic, Config), + KafkaName = ?config(kafka_name, Config), + ResourceId = resource_id(Config), + BridgeId = emqx_bridge_resource:bridge_id(?BRIDGE_TYPE_BIN, KafkaName), + Cluster = cluster(Config), + ?check_trace( + begin + [{Name1, Opts1}, {Name2, Opts2} | _] = Cluster, + N1 = emqx_common_test_helpers:start_slave(Name1, Opts1), + on_exit(fun() -> ok = emqx_common_test_helpers:stop_slave(N1) end), + setup_group_subscriber_spy(N1), + {{ok, _}, {ok, _}} = + ?wait_async_action( + erpc:call(N1, fun() -> {ok, _} = create_bridge(Config) end), + #{?snk_kind := kafka_consumer_subscriber_started}, + 15_000 + ), + ?assertMatch({ok, _}, erpc:call(N1, emqx_bridge, lookup, [BridgeId])), + {ok, _} = wait_until_group_is_balanced(KafkaTopic, NPartitions, [N1], 30_000), + ?assertEqual( + {ok, connected}, + erpc:call(N1, emqx_resource_manager, health_check, [ResourceId]) + ), + + %% Now, we start the second node and have it join the cluster. + setup_and_start_listeners(N1, Opts1), + TCPPort1 = emqx_common_test_helpers:listener_port(Opts1, tcp), + {ok, C1} = emqtt:start_link([{port, TCPPort1}, {proto_ver, v5}]), + on_exit(fun() -> catch emqtt:stop(C1) end), + {ok, _} = emqtt:connect(C1), + {ok, _, [2]} = emqtt:subscribe(C1, MQTTTopic, 2), + + {ok, SRef0} = snabbkaffe:subscribe( + ?match_event(#{?snk_kind := kafka_consumer_subscriber_started}), + 1, + 30_000 + ), + N2 = emqx_common_test_helpers:start_slave(Name2, Opts2), + on_exit(fun() -> ok = emqx_common_test_helpers:stop_slave(N2) end), + setup_group_subscriber_spy(N2), + Nodes = [N1, N2], + wait_for_cluster_rpc(N2), + + {ok, _} = snabbkaffe:receive_events(SRef0), + ?assertMatch({ok, _}, erpc:call(N2, emqx_bridge, lookup, [BridgeId])), + %% Give some time for the consumers in both nodes to + %% rebalance. + {ok, _} = wait_until_group_is_balanced(KafkaTopic, NPartitions, Nodes, 30_000), + %% Publish some messages so we can check they came from each node. + ?retry( + _Sleep1 = 100, + _Attempts1 = 50, + true = erpc:call(N2, emqx_router, has_routes, [MQTTTopic]) + ), + {ok, SRef1} = + snabbkaffe:subscribe( + ?match_event(#{ + ?snk_kind := kafka_consumer_handle_message, + ?snk_span := {complete, _} + }), + NPartitions, + 10_000 + ), + lists:foreach( + fun(N) -> + Key = <<"k", (integer_to_binary(N))/binary>>, + Val = <<"v", (integer_to_binary(N))/binary>>, + publish(Config, KafkaTopic, [#{key => Key, value => Val}]) + end, + lists:seq(1, NPartitions) + ), + {ok, _} = snabbkaffe:receive_events(SRef1), + + #{nodes => Nodes} + end, + fun(Res, Trace0) -> + #{nodes := Nodes} = Res, + Trace1 = ?of_kind(kafka_assignment, Trace0), + Assignments = reconstruct_assignments_from_events(KafkaTopic, Trace1), + NodeAssignments = lists:usort([ + N + || {_Partition, {N, _MemberId}} <- + maps:to_list(Assignments) + ]), + ?assertEqual(lists:usort(Nodes), NodeAssignments), + ?assertEqual(NPartitions, map_size(Assignments)), + Published = receive_published(#{n => NPartitions, timeout => 3_000}), + ct:pal("published:\n ~p", [Published]), + PublishingNodesFromTrace = + [ + N + || #{ + ?snk_kind := kafka_consumer_handle_message, + ?snk_span := start, + ?snk_meta := #{node := N} + } <- Trace0 + ], + ?assertEqual(lists:usort(Nodes), lists:usort(PublishingNodesFromTrace)), + ok + end + ), + ok. + +%% Checks that the consumers get rebalanced after an EMQX nodes goes +%% down. +t_cluster_node_down(Config) -> + ct:timetrap({seconds, 150}), + TopicMapping = ?config(topic_mapping, Config), + [MQTTTopic] = [MQTTTopic || #{mqtt_topic := MQTTTopic} <- TopicMapping], + NPartitions = ?config(num_partitions, Config), + KafkaTopic = ?config(kafka_topic, Config), + KafkaName = ?config(kafka_name, Config), + BridgeId = emqx_bridge_resource:bridge_id(?BRIDGE_TYPE_BIN, KafkaName), + Cluster = cluster(Config), + ?check_trace( + begin + {_N2, Opts2} = lists:nth(2, Cluster), + Nodes = + [N1, N2 | _] = + lists:map( + fun({Name, Opts}) -> emqx_common_test_helpers:start_slave(Name, Opts) end, + Cluster + ), + on_exit(fun() -> + lists:foreach( + fun(N) -> + ct:pal("stopping ~p", [N]), + ok = emqx_common_test_helpers:stop_slave(N) + end, + Nodes + ) + end), + lists:foreach(fun setup_group_subscriber_spy/1, Nodes), + {ok, SRef0} = snabbkaffe:subscribe( + ?match_event(#{?snk_kind := kafka_consumer_subscriber_started}), + length(Nodes), + 15_000 + ), + wait_for_cluster_rpc(N2), + erpc:call(N2, fun() -> {ok, _} = create_bridge(Config) end), + {ok, _} = snabbkaffe:receive_events(SRef0), + lists:foreach( + fun(N) -> + ?assertMatch( + {ok, _}, + erpc:call(N, emqx_bridge, lookup, [BridgeId]), + #{node => N} + ) + end, + Nodes + ), + {ok, _} = wait_until_group_is_balanced(KafkaTopic, NPartitions, Nodes, 30_000), + + %% Now, we stop one of the nodes and watch the group + %% rebalance. + setup_and_start_listeners(N2, Opts2), + TCPPort = emqx_common_test_helpers:listener_port(Opts2, tcp), + {ok, C} = emqtt:start_link([{port, TCPPort}, {proto_ver, v5}]), + on_exit(fun() -> catch emqtt:stop(C) end), + {ok, _} = emqtt:connect(C), + {ok, _, [2]} = emqtt:subscribe(C, MQTTTopic, 2), + {TId, Pid} = start_async_publisher(Config, KafkaTopic), + + ct:pal("stopping node ~p", [N1]), + ok = emqx_common_test_helpers:stop_slave(N1), + + %% Give some time for the consumers in remaining node to + %% rebalance. + {ok, _} = wait_until_group_is_balanced(KafkaTopic, NPartitions, [N2], 60_000), + + ok = stop_async_publisher(Pid), + + #{nodes => Nodes, payloads_tid => TId} + end, + fun(Res, Trace0) -> + #{nodes := Nodes, payloads_tid := TId} = Res, + [_N1, N2 | _] = Nodes, + Trace1 = ?of_kind(kafka_assignment, Trace0), + Assignments = reconstruct_assignments_from_events(KafkaTopic, Trace1), + NodeAssignments = lists:usort([ + N + || {_Partition, {N, _MemberId}} <- + maps:to_list(Assignments) + ]), + %% The surviving node has all the partitions assigned to + %% it. + ?assertEqual([N2], NodeAssignments), + ?assertEqual(NPartitions, map_size(Assignments)), + NumPublished = ets:info(TId, size), + %% All published messages are eventually received. + Published = receive_published(#{n => NumPublished, timeout => 3_000}), + ct:pal("published:\n ~p", [Published]), + ok + end + ), + ok. diff --git a/lib-ee/emqx_ee_bridge/test/emqx_bridge_impl_kafka_producer_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_bridge_impl_kafka_producer_SUITE.erl index 9b38e98d3..4b9642442 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_bridge_impl_kafka_producer_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_bridge_impl_kafka_producer_SUITE.erl @@ -11,7 +11,7 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("brod/include/brod.hrl"). --define(PRODUCER, emqx_bridge_impl_kafka). +-define(PRODUCER, emqx_bridge_impl_kafka_producer). %%------------------------------------------------------------------------------ %% Things for REST API tests @@ -30,16 +30,15 @@ -include_lib("emqx/include/emqx.hrl"). -include("emqx_dashboard.hrl"). --define(CONTENT_TYPE, "application/x-www-form-urlencoded"). - -define(HOST, "http://127.0.0.1:18083"). %% -define(API_VERSION, "v5"). -define(BASE_PATH, "/api/v5"). --define(APP_DASHBOARD, emqx_dashboard). --define(APP_MANAGEMENT, emqx_management). +%% TODO: rename this to `kafka_producer' after alias support is added +%% to hocon; keeping this as just `kafka' for backwards compatibility. +-define(BRIDGE_TYPE, "kafka"). %%------------------------------------------------------------------------------ %% CT boilerplate @@ -71,6 +70,10 @@ wait_until_kafka_is_up(Attempts) -> end. init_per_suite(Config) -> + %% ensure loaded + _ = application:load(emqx_ee_bridge), + _ = emqx_ee_bridge:module_info(), + application:load(emqx_bridge), ok = emqx_common_test_helpers:start_apps([emqx_conf]), ok = emqx_connector_test_helpers:start_apps([emqx_resource, emqx_bridge, emqx_rule_engine]), {ok, _} = application:ensure_all_started(emqx_connector), @@ -102,6 +105,13 @@ init_per_group(GroupName, Config) -> end_per_group(_, _) -> ok. +init_per_testcase(_TestCase, Config) -> + Config. + +end_per_testcase(_TestCase, _Config) -> + delete_all_bridges(), + ok. + set_special_configs(emqx_management) -> Listeners = #{http => #{port => 8081}}, Config = #{ @@ -222,7 +232,7 @@ kafka_bridge_rest_api_all_auth_methods(UseSSL) -> ok. kafka_bridge_rest_api_helper(Config) -> - BridgeType = "kafka", + BridgeType = ?BRIDGE_TYPE, BridgeName = "my_kafka_bridge", BridgeID = emqx_bridge_resource:bridge_id( erlang:list_to_binary(BridgeType), @@ -233,6 +243,7 @@ kafka_bridge_rest_api_helper(Config) -> erlang:list_to_binary(BridgeName) ), UrlEscColon = "%3A", + BridgesProbeParts = ["bridges_probe"], BridgeIdUrlEnc = BridgeType ++ UrlEscColon ++ BridgeName, BridgesParts = ["bridges"], BridgesPartsIdDeleteAlsoActions = ["bridges", BridgeIdUrlEnc ++ "?also_delete_dep_actions"], @@ -266,24 +277,18 @@ kafka_bridge_rest_api_helper(Config) -> %% Create new Kafka bridge KafkaTopic = "test-topic-one-partition", CreateBodyTmp = #{ - <<"type">> => <<"kafka">>, + <<"type">> => <>, <<"name">> => <<"my_kafka_bridge">>, <<"bootstrap_hosts">> => iolist_to_binary(maps:get(<<"bootstrap_hosts">>, Config)), <<"enable">> => true, <<"authentication">> => maps:get(<<"authentication">>, Config), - <<"producer">> => #{ - <<"mqtt">> => #{ - topic => <<"t/#">> - }, - <<"kafka">> => #{ - <<"topic">> => iolist_to_binary(KafkaTopic), - <<"buffer">> => #{ - <<"memory_overload_protection">> => <<"false">> - }, - <<"message">> => #{ - <<"key">> => <<"${clientid}">>, - <<"value">> => <<"${.payload}">> - } + <<"local_topic">> => <<"t/#">>, + <<"kafka">> => #{ + <<"topic">> => iolist_to_binary(KafkaTopic), + <<"buffer">> => #{<<"memory_overload_protection">> => <<"false">>}, + <<"message">> => #{ + <<"key">> => <<"${clientid}">>, + <<"value">> => <<"${.payload}">> } } }, @@ -295,6 +300,13 @@ kafka_bridge_rest_api_helper(Config) -> {ok, 201, _Data} = show(http_post(BridgesParts, show(CreateBody))), %% Check that the new bridge is in the list of bridges true = MyKafkaBridgeExists(), + %% Probe should work + {ok, 204, _} = http_post(BridgesProbeParts, CreateBody), + %% no extra atoms should be created when probing + AtomsBefore = erlang:system_info(atom_count), + {ok, 204, _} = http_post(BridgesProbeParts, CreateBody), + AtomsAfter = erlang:system_info(atom_count), + ?assertEqual(AtomsBefore, AtomsAfter), %% Create a rule that uses the bridge {ok, 201, _Rule} = http_post( ["rules"], @@ -355,6 +367,7 @@ kafka_bridge_rest_api_helper(Config) -> %% Cleanup {ok, 204, _} = show(http_delete(BridgesPartsIdDeleteAlsoActions)), false = MyKafkaBridgeExists(), + delete_all_bridges(), ok. %%------------------------------------------------------------------------------ @@ -371,9 +384,10 @@ t_failed_creation_then_fix(Config) -> ValidAuthSettings = valid_sasl_plain_settings(), WrongAuthSettings = ValidAuthSettings#{"password" := "wrong"}, Hash = erlang:phash2([HostsString, ?FUNCTION_NAME]), + Type = ?BRIDGE_TYPE, Name = "kafka_bridge_name_" ++ erlang:integer_to_list(Hash), - ResourceId = emqx_bridge_resource:resource_id("kafka", Name), - BridgeId = emqx_bridge_resource:bridge_id("kafka", Name), + ResourceId = emqx_bridge_resource:resource_id(Type, Name), + BridgeId = emqx_bridge_resource:bridge_id(Type, Name), KafkaTopic = "test-topic-one-partition", WrongConf = config(#{ "authentication" => WrongAuthSettings, @@ -397,18 +411,20 @@ t_failed_creation_then_fix(Config) -> "ssl" => #{} }), %% creates, but fails to start producers - %% FIXME: change to kafka_producer after config refactoring - ?assertMatch(ok, emqx_bridge_resource:create(kafka, erlang:list_to_atom(Name), WrongConf, #{})), - ?assertThrow(failed_to_start_kafka_producer, ?PRODUCER:on_start(ResourceId, WrongConf)), + {ok, #{config := WrongConfigAtom1}} = emqx_bridge:create( + Type, erlang:list_to_atom(Name), WrongConf + ), + WrongConfigAtom = WrongConfigAtom1#{bridge_name => Name}, + ?assertThrow(failed_to_start_kafka_producer, ?PRODUCER:on_start(ResourceId, WrongConfigAtom)), %% before throwing, it should cleanup the client process. ?assertEqual([], supervisor:which_children(wolff_client_sup)), - %% FIXME: change to kafka_producer after config refactoring %% must succeed with correct config - ?assertMatch(ok, emqx_bridge_resource:create(kafka, erlang:list_to_atom(Name), ValidConf, #{})), - {ok, State} = ?PRODUCER:on_start(ResourceId, ValidConf), - %% To make sure we get unique value - timer:sleep(1), - Time = erlang:monotonic_time(), + {ok, #{config := ValidConfigAtom1}} = emqx_bridge:create( + Type, erlang:list_to_atom(Name), ValidConf + ), + ValidConfigAtom = ValidConfigAtom1#{bridge_name => Name}, + {ok, State} = ?PRODUCER:on_start(ResourceId, ValidConfigAtom), + Time = erlang:unique_integer(), BinTime = integer_to_binary(Time), Msg = #{ clientid => BinTime, @@ -423,6 +439,7 @@ t_failed_creation_then_fix(Config) -> %% TODO: refactor those into init/end per testcase ok = ?PRODUCER:on_stop(ResourceId, State), ok = emqx_bridge_resource:remove(BridgeId), + delete_all_bridges(), ok. %%------------------------------------------------------------------------------ @@ -487,6 +504,7 @@ publish_helper( }, Conf0 ) -> + delete_all_bridges(), HostsString = case {AuthSettings, SSLSettings} of {"none", Map} when map_size(Map) =:= 0 -> @@ -500,8 +518,8 @@ publish_helper( end, Hash = erlang:phash2([HostsString, AuthSettings, SSLSettings]), Name = "kafka_bridge_name_" ++ erlang:integer_to_list(Hash), - InstId = emqx_bridge_resource:resource_id("kafka", Name), - BridgeId = emqx_bridge_resource:bridge_id("kafka", Name), + Type = ?BRIDGE_TYPE, + InstId = emqx_bridge_resource:resource_id(Type, Name), KafkaTopic = "test-topic-one-partition", Conf = config( #{ @@ -509,30 +527,38 @@ publish_helper( "kafka_hosts_string" => HostsString, "kafka_topic" => KafkaTopic, "instance_id" => InstId, + "local_topic" => <<"mqtt/local">>, "ssl" => SSLSettings }, Conf0 ), - - emqx_bridge_resource:create(kafka, erlang:list_to_atom(Name), Conf, #{}), - %% To make sure we get unique value - timer:sleep(1), - Time = erlang:monotonic_time(), + {ok, _} = emqx_bridge:create( + <>, list_to_binary(Name), Conf + ), + Time = erlang:unique_integer(), BinTime = integer_to_binary(Time), + Partition = 0, Msg = #{ clientid => BinTime, payload => <<"payload">>, timestamp => Time }, - {ok, Offset} = resolve_kafka_offset(kafka_hosts(), KafkaTopic, 0), - ct:pal("base offset before testing ~p", [Offset]), - StartRes = ?PRODUCER:on_start(InstId, Conf), - {ok, State} = StartRes, + {ok, Offset0} = resolve_kafka_offset(kafka_hosts(), KafkaTopic, Partition), + ct:pal("base offset before testing ~p", [Offset0]), + {ok, _Group, #{state := State}} = emqx_resource:get_instance(InstId), ok = send(CtConfig, InstId, Msg, State), - {ok, {_, [KafkaMsg]}} = brod:fetch(kafka_hosts(), KafkaTopic, 0, Offset), - ?assertMatch(#kafka_message{key = BinTime}, KafkaMsg), - ok = ?PRODUCER:on_stop(InstId, State), - ok = emqx_bridge_resource:remove(BridgeId), + {ok, {_, [KafkaMsg0]}} = brod:fetch(kafka_hosts(), KafkaTopic, Partition, Offset0), + ?assertMatch(#kafka_message{key = BinTime}, KafkaMsg0), + + %% test that it forwards from local mqtt topic as well + {ok, Offset1} = resolve_kafka_offset(kafka_hosts(), KafkaTopic, Partition), + ct:pal("base offset before testing (2) ~p", [Offset1]), + emqx:publish(emqx_message:make(<<"mqtt/local">>, <<"payload">>)), + ct:sleep(2_000), + {ok, {_, [KafkaMsg1]}} = brod:fetch(kafka_hosts(), KafkaTopic, Partition, Offset1), + ?assertMatch(#kafka_message{value = <<"payload">>}, KafkaMsg1), + + delete_all_bridges(), ok. default_config() -> @@ -545,18 +571,24 @@ config(Args0, More) -> Args1 = maps:merge(default_config(), Args0), Args = maps:merge(Args1, More), ConfText = hocon_config(Args), - ct:pal("Running tests with conf:\n~s", [ConfText]), - {ok, Conf} = hocon:binary(ConfText), - #{config := Parsed} = hocon_tconf:check_plain( - emqx_ee_bridge_kafka, - #{<<"config">> => Conf}, - #{atom_key => true} - ), + {ok, Conf} = hocon:binary(ConfText, #{format => map}), + ct:pal("Running tests with conf:\n~p", [Conf]), InstId = maps:get("instance_id", Args), <<"bridge:", BridgeId/binary>> = InstId, - Parsed#{bridge_name => erlang:element(2, emqx_bridge_resource:parse_bridge_id(BridgeId))}. + {Type, Name} = emqx_bridge_resource:parse_bridge_id(BridgeId), + TypeBin = atom_to_binary(Type), + hocon_tconf:check_plain( + emqx_bridge_schema, + Conf, + #{atom_key => false, required => false} + ), + #{<<"bridges">> := #{TypeBin := #{Name := Parsed}}} = Conf, + Parsed. hocon_config(Args) -> + InstId = maps:get("instance_id", Args), + <<"bridge:", BridgeId/binary>> = InstId, + {_Type, Name} = emqx_bridge_resource:parse_bridge_id(BridgeId), AuthConf = maps:get("authentication", Args), AuthTemplate = iolist_to_binary(hocon_config_template_authentication(AuthConf)), AuthConfRendered = bbmustache:render(AuthTemplate, AuthConf), @@ -567,6 +599,7 @@ hocon_config(Args) -> iolist_to_binary(hocon_config_template()), Args#{ "authentication" => AuthConfRendered, + "bridge_name" => Name, "ssl" => SSLConfRendered } ), @@ -574,23 +607,34 @@ hocon_config(Args) -> %% erlfmt-ignore hocon_config_template() -> +%% TODO: rename the type to `kafka_producer' after alias support is +%% added to hocon; keeping this as just `kafka' for backwards +%% compatibility. """ -bootstrap_hosts = \"{{ kafka_hosts_string }}\" -enable = true -authentication = {{{ authentication }}} -ssl = {{{ ssl }}} -producer = { - mqtt { - topic = \"t/#\" +bridges.kafka.{{ bridge_name }} { + bootstrap_hosts = \"{{ kafka_hosts_string }}\" + enable = true + authentication = {{{ authentication }}} + ssl = {{{ ssl }}} + local_topic = \"{{ local_topic }}\" + kafka = { + message = { + key = \"${clientid}\" + value = \"${.payload}\" + timestamp = \"${timestamp}\" } - kafka = { - topic = \"{{ kafka_topic }}\" - message = {key = \"${clientid}\", value = \"${.payload}\"} - partition_strategy = {{ partition_strategy }} - buffer = { - memory_overload_protection = false - } + buffer = { + memory_overload_protection = false } + partition_strategy = {{ partition_strategy }} + topic = \"{{ kafka_topic }}\" + } + metadata_request_timeout = 5s + min_metadata_refresh_interval = 3s + socket_opts { + nodelay = true + } + connect_timeout = 5s } """. @@ -631,22 +675,42 @@ hocon_config_template_ssl(_) -> """. kafka_hosts_string() -> - "kafka-1.emqx.net:9092,". + KafkaHost = os:getenv("KAFKA_PLAIN_HOST", "kafka-1.emqx.net"), + KafkaPort = os:getenv("KAFKA_PLAIN_PORT", "9092"), + KafkaHost ++ ":" ++ KafkaPort ++ ",". kafka_hosts_string_sasl() -> - "kafka-1.emqx.net:9093,". + KafkaHost = os:getenv("KAFKA_SASL_PLAIN_HOST", "kafka-1.emqx.net"), + KafkaPort = os:getenv("KAFKA_SASL_PLAIN_PORT", "9093"), + KafkaHost ++ ":" ++ KafkaPort ++ ",". kafka_hosts_string_ssl() -> - "kafka-1.emqx.net:9094,". + KafkaHost = os:getenv("KAFKA_SSL_HOST", "kafka-1.emqx.net"), + KafkaPort = os:getenv("KAFKA_SSL_PORT", "9094"), + KafkaHost ++ ":" ++ KafkaPort ++ ",". kafka_hosts_string_ssl_sasl() -> - "kafka-1.emqx.net:9095,". + KafkaHost = os:getenv("KAFKA_SASL_SSL_HOST", "kafka-1.emqx.net"), + KafkaPort = os:getenv("KAFKA_SASL_SSL_PORT", "9095"), + KafkaHost ++ ":" ++ KafkaPort ++ ",". + +shared_secret_path() -> + os:getenv("CI_SHARED_SECRET_PATH", "/var/lib/secret"). + +shared_secret(client_keyfile) -> + filename:join([shared_secret_path(), "client.key"]); +shared_secret(client_certfile) -> + filename:join([shared_secret_path(), "client.crt"]); +shared_secret(client_cacertfile) -> + filename:join([shared_secret_path(), "ca.crt"]); +shared_secret(rig_keytab) -> + filename:join([shared_secret_path(), "rig.keytab"]). valid_ssl_settings() -> #{ - "cacertfile" => <<"/var/lib/secret/ca.crt">>, - "certfile" => <<"/var/lib/secret/client.crt">>, - "keyfile" => <<"/var/lib/secret/client.key">>, + "cacertfile" => shared_secret(client_cacertfile), + "certfile" => shared_secret(client_certfile), + "keyfile" => shared_secret(client_keyfile), "enable" => <<"true">> }. @@ -670,7 +734,7 @@ valid_sasl_scram512_settings() -> valid_sasl_kerberos_settings() -> #{ "kerberos_principal" => "rig@KDC.EMQX.NET", - "kerberos_keytab_file" => "/var/lib/secret/rig.keytab" + "kerberos_keytab_file" => shared_secret(rig_keytab) }. kafka_hosts() -> @@ -732,3 +796,17 @@ api_path(Parts) -> json(Data) -> {ok, Jsx} = emqx_json:safe_decode(Data, [return_maps]), Jsx. + +delete_all_bridges() -> + lists:foreach( + fun(#{name := Name, type := Type}) -> + emqx_bridge:remove(Type, Name) + end, + emqx_bridge:list() + ), + %% at some point during the tests, sometimes `emqx_bridge:list()' + %% returns an empty list, but `emqx:get_config([bridges])' returns + %% a bunch of orphan test bridges... + lists:foreach(fun emqx_resource:remove/1, emqx_resource:list_instances()), + emqx_config:put([bridges], #{}), + ok. diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl index 452b7a4d2..8424ddff0 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl @@ -1105,13 +1105,13 @@ do_econnrefused_or_timeout_test(Config, Error) -> ?assertMatch( #{ dropped := Dropped, - failed := 0, + failed := Failed, inflight := Inflight, matched := Matched, queuing := Queueing, retried := 0, success := 0 - } when Matched >= 1 andalso Inflight + Queueing + Dropped =< 2, + } when Matched >= 1 andalso Inflight + Queueing + Dropped + Failed =< 2, CurrentMetrics ); {timeout, async} -> diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_kafka_tests.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_kafka_tests.erl new file mode 100644 index 000000000..72096c7b1 --- /dev/null +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_kafka_tests.erl @@ -0,0 +1,287 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_ee_bridge_kafka_tests). + +-include_lib("eunit/include/eunit.hrl"). + +%%=========================================================================== +%% Test cases +%%=========================================================================== + +kafka_producer_test() -> + Conf1 = parse(kafka_producer_old_hocon(_WithLocalTopic0 = false)), + Conf2 = parse(kafka_producer_old_hocon(_WithLocalTopic1 = true)), + Conf3 = parse(kafka_producer_new_hocon()), + + ?assertMatch( + #{ + <<"bridges">> := + #{ + <<"kafka">> := + #{ + <<"myproducer">> := + #{<<"kafka">> := #{}} + } + } + }, + check(Conf1) + ), + ?assertNotMatch( + #{ + <<"bridges">> := + #{ + <<"kafka">> := + #{ + <<"myproducer">> := + #{<<"local_topic">> := _} + } + } + }, + check(Conf1) + ), + ?assertMatch( + #{ + <<"bridges">> := + #{ + <<"kafka">> := + #{ + <<"myproducer">> := + #{ + <<"kafka">> := #{}, + <<"local_topic">> := <<"mqtt/local">> + } + } + } + }, + check(Conf2) + ), + ?assertMatch( + #{ + <<"bridges">> := + #{ + <<"kafka">> := + #{ + <<"myproducer">> := + #{ + <<"kafka">> := #{}, + <<"local_topic">> := <<"mqtt/local">> + } + } + } + }, + check(Conf3) + ), + + ok. + +kafka_consumer_test() -> + Conf1 = parse(kafka_consumer_hocon()), + ?assertMatch( + #{ + <<"bridges">> := + #{ + <<"kafka_consumer">> := + #{ + <<"my_consumer">> := _ + } + } + }, + check(Conf1) + ), + + %% Bad: can't repeat kafka topics. + BadConf1 = emqx_map_lib:deep_put( + [<<"bridges">>, <<"kafka_consumer">>, <<"my_consumer">>, <<"topic_mapping">>], + Conf1, + [ + #{ + <<"kafka_topic">> => <<"t1">>, + <<"mqtt_topic">> => <<"mqtt/t1">>, + <<"qos">> => 1, + <<"payload_template">> => <<"${.}">> + }, + #{ + <<"kafka_topic">> => <<"t1">>, + <<"mqtt_topic">> => <<"mqtt/t2">>, + <<"qos">> => 2, + <<"payload_template">> => <<"v = ${.value}">> + } + ] + ), + ?assertThrow( + {_, [ + #{ + path := "bridges.kafka_consumer.my_consumer.topic_mapping", + reason := "Kafka topics must not be repeated in a bridge" + } + ]}, + check(BadConf1) + ), + + %% Bad: there must be at least 1 mapping. + BadConf2 = emqx_map_lib:deep_put( + [<<"bridges">>, <<"kafka_consumer">>, <<"my_consumer">>, <<"topic_mapping">>], + Conf1, + [] + ), + ?assertThrow( + {_, [ + #{ + path := "bridges.kafka_consumer.my_consumer.topic_mapping", + reason := "There must be at least one Kafka-MQTT topic mapping" + } + ]}, + check(BadConf2) + ), + + ok. + +%%=========================================================================== +%% Helper functions +%%=========================================================================== + +parse(Hocon) -> + {ok, Conf} = hocon:binary(Hocon), + Conf. + +check(Conf) when is_map(Conf) -> + hocon_tconf:check_plain(emqx_bridge_schema, Conf). + +%%=========================================================================== +%% Data section +%%=========================================================================== + +%% erlfmt-ignore +kafka_producer_old_hocon(_WithLocalTopic = true) -> + kafka_producer_old_hocon("mqtt {topic = \"mqtt/local\"}\n"); +kafka_producer_old_hocon(_WithLocalTopic = false) -> + kafka_producer_old_hocon("mqtt {}\n"); +kafka_producer_old_hocon(MQTTConfig) when is_list(MQTTConfig) -> +""" +bridges.kafka { + myproducer { + authentication = \"none\" + bootstrap_hosts = \"toxiproxy:9292\" + connect_timeout = \"5s\" + metadata_request_timeout = \"5s\" + min_metadata_refresh_interval = \"3s\" + producer { + kafka { + buffer { + memory_overload_protection = false + mode = \"memory\" + per_partition_limit = \"2GB\" + segment_bytes = \"100MB\" + } + compression = \"no_compression\" + max_batch_bytes = \"896KB\" + max_inflight = 10 + message { + key = \"${.clientid}\" + timestamp = \"${.timestamp}\" + value = \"${.}\" + } + partition_count_refresh_interval = \"60s\" + partition_strategy = \"random\" + required_acks = \"all_isr\" + topic = \"test-topic-two-partitions\" + } +""" ++ MQTTConfig ++ +""" + } + socket_opts { + nodelay = true + recbuf = \"1024KB\" + sndbuf = \"1024KB\" + } + ssl {enable = false, verify = \"verify_peer\"} + } +} +""". + +kafka_producer_new_hocon() -> + "" + "\n" + "bridges.kafka {\n" + " myproducer {\n" + " authentication = \"none\"\n" + " bootstrap_hosts = \"toxiproxy:9292\"\n" + " connect_timeout = \"5s\"\n" + " metadata_request_timeout = \"5s\"\n" + " min_metadata_refresh_interval = \"3s\"\n" + " kafka {\n" + " buffer {\n" + " memory_overload_protection = false\n" + " mode = \"memory\"\n" + " per_partition_limit = \"2GB\"\n" + " segment_bytes = \"100MB\"\n" + " }\n" + " compression = \"no_compression\"\n" + " max_batch_bytes = \"896KB\"\n" + " max_inflight = 10\n" + " message {\n" + " key = \"${.clientid}\"\n" + " timestamp = \"${.timestamp}\"\n" + " value = \"${.}\"\n" + " }\n" + " partition_count_refresh_interval = \"60s\"\n" + " partition_strategy = \"random\"\n" + " required_acks = \"all_isr\"\n" + " topic = \"test-topic-two-partitions\"\n" + " }\n" + " local_topic = \"mqtt/local\"\n" + " socket_opts {\n" + " nodelay = true\n" + " recbuf = \"1024KB\"\n" + " sndbuf = \"1024KB\"\n" + " }\n" + " ssl {enable = false, verify = \"verify_peer\"}\n" + " }\n" + "}\n" + "". + +%% erlfmt-ignore +kafka_consumer_hocon() -> +""" +bridges.kafka_consumer.my_consumer { + enable = true + bootstrap_hosts = \"kafka-1.emqx.net:9292\" + connect_timeout = 5s + min_metadata_refresh_interval = 3s + metadata_request_timeout = 5s + authentication = { + mechanism = plain + username = emqxuser + password = password + } + kafka { + max_batch_bytes = 896KB + max_rejoin_attempts = 5 + offset_commit_interval_seconds = 3 + offset_reset_policy = reset_to_latest + } + topic_mapping = [ + { + kafka_topic = \"kafka-topic-1\" + mqtt_topic = \"mqtt/topic/1\" + qos = 1 + payload_template = \"${.}\" + }, + { + kafka_topic = \"kafka-topic-2\" + mqtt_topic = \"mqtt/topic/2\" + qos = 2 + payload_template = \"v = ${.value}\" + } + ] + key_encoding_mode = none + value_encoding_mode = none + ssl { + enable = false + verify = verify_none + server_name_indication = \"auto\" + } +} +""". diff --git a/mix.exs b/mix.exs index e946c257b..42354f8dc 100644 --- a/mix.exs +++ b/mix.exs @@ -68,7 +68,7 @@ defmodule EMQXUmbrella.MixProject do {:telemetry, "1.1.0"}, # in conflict by emqtt and hocon {:getopt, "1.0.2", override: true}, - {:snabbkaffe, github: "kafka4beam/snabbkaffe", tag: "1.0.0", override: true}, + {:snabbkaffe, github: "kafka4beam/snabbkaffe", tag: "1.0.7", override: true}, {:hocon, github: "emqx/hocon", tag: "0.37.0", override: true}, {:emqx_http_lib, github: "emqx/emqx_http_lib", tag: "0.5.2", override: true}, {:esasl, github: "emqx/esasl", tag: "0.2.0"}, @@ -135,7 +135,7 @@ defmodule EMQXUmbrella.MixProject do {:wolff, github: "kafka4beam/wolff", tag: "1.7.5"}, {:kafka_protocol, github: "kafka4beam/kafka_protocol", tag: "4.1.2", override: true}, {:brod_gssapi, github: "kafka4beam/brod_gssapi", tag: "v0.1.0-rc1"}, - {:brod, github: "kafka4beam/brod", tag: "3.16.7"}, + {:brod, github: "kafka4beam/brod", tag: "3.16.8"}, {:snappyer, "1.2.8", override: true}, {:supervisor3, "1.1.11", override: true} ] diff --git a/rebar.config b/rebar.config index 8bfcc7960..5ce9138ce 100644 --- a/rebar.config +++ b/rebar.config @@ -68,7 +68,7 @@ , {observer_cli, "1.7.1"} % NOTE: depends on recon 2.5.x , {system_monitor, {git, "https://github.com/ieQu1/system_monitor", {tag, "3.0.3"}}} , {getopt, "1.0.2"} - , {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.0"}}} + , {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.7"}}} , {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.37.0"}}} , {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.2"}}} , {esasl, {git, "https://github.com/emqx/esasl", {tag, "0.2.0"}}} diff --git a/scripts/ct/run.sh b/scripts/ct/run.sh index b3c424ea1..bf7b2073d 100755 --- a/scripts/ct/run.sh +++ b/scripts/ct/run.sh @@ -21,11 +21,16 @@ help() { echo " otherwise it runs the entire app's CT" } -if command -v docker-compose; then +set +e +if docker compose version; then + DC='docker compose' +elif command -v docker-compose; then DC='docker-compose' else - DC='docker compose' + echo 'Neither "docker compose" or "docker-compose" are available, stop.' + exit 1 fi +set -e WHICH_APP='novalue' CONSOLE='no' @@ -154,14 +159,11 @@ for dep in ${CT_DEPS}; do '.ci/docker-compose-file/docker-compose-pgsql-tls.yaml' ) ;; kafka) - # Kafka container generates root owned ssl files - # the files are shared with EMQX (with a docker volume) - NEED_ROOT=yes FILES+=( '.ci/docker-compose-file/docker-compose-kafka.yaml' ) ;; tdengine) FILES+=( '.ci/docker-compose-file/docker-compose-tdengine-restful.yaml' ) - ;; + ;; clickhouse) FILES+=( '.ci/docker-compose-file/docker-compose-clickhouse.yaml' ) ;; @@ -180,47 +182,43 @@ F_OPTIONS="" for file in "${FILES[@]}"; do F_OPTIONS="$F_OPTIONS -f $file" done -ORIG_UID_GID="$UID:$UID" -if [[ "${NEED_ROOT:-}" == 'yes' ]]; then - export UID_GID='root:root' -else - # Passing $UID to docker-compose to be used in erlang container - # as owner of the main process to avoid git repo permissions issue. - # Permissions issue happens because we are mounting local filesystem - # where files are owned by $UID to docker container where it's using - # root (UID=0) by default, and git is not happy about it. - export UID_GID="$ORIG_UID_GID" -fi -# /emqx is where the source dir is mounted to the Erlang container -# in .ci/docker-compose-file/docker-compose.yaml +DOCKER_USER="$(id -u)" +export DOCKER_USER + TTY='' if [[ -t 1 ]]; then TTY='-t' fi -function restore_ownership { - if [[ -n ${EMQX_TEST_DO_NOT_RUN_SUDO+x} ]] || ! sudo chown -R "$ORIG_UID_GID" . >/dev/null 2>&1; then - docker exec -i $TTY -u root:root "$ERLANG_CONTAINER" bash -c "chown -R $ORIG_UID_GID /emqx" >/dev/null 2>&1 || true - fi -} - -restore_ownership -trap restore_ownership EXIT - +# ensure directory with secrets is created by current user before running compose +mkdir -p /tmp/emqx-ci/emqx-shared-secret if [ "$STOP" = 'no' ]; then # some left-over log file has to be deleted before a new docker-compose up rm -f '.ci/docker-compose-file/redis/*.log' + set +e # shellcheck disable=2086 # no quotes for F_OPTIONS $DC $F_OPTIONS up -d --build --remove-orphans + RESULT=$? + if [ $RESULT -ne 0 ]; then + mkdir -p _build/test/logs + LOG='_build/test/logs/docker-compose.log' + echo "Dumping docker-compose log to $LOG" + # shellcheck disable=2086 # no quotes for F_OPTIONS + $DC $F_OPTIONS logs --no-color --timestamps > "$LOG" + exit 1 + fi + set -e fi -echo "Fixing file owners and permissions for $UID_GID" -# rebar and hex cache directory need to be writable by $UID -docker exec -i $TTY -u root:root "$ERLANG_CONTAINER" bash -c "mkdir -p /.cache && chown $UID_GID /.cache && chown -R $UID_GID /emqx/.git /emqx/.ci /emqx/_build/default/lib" -# need to initialize .erlang.cookie manually here because / is not writable by $UID -docker exec -i $TTY -u root:root "$ERLANG_CONTAINER" bash -c "openssl rand -base64 16 > /.erlang.cookie && chown $UID_GID /.erlang.cookie && chmod 0400 /.erlang.cookie" +# rebar, mix and hex cache directory need to be writable by $DOCKER_USER +docker exec -i $TTY -u root:root "$ERLANG_CONTAINER" bash -c "mkdir -p /.cache /.hex /.mix && chown $DOCKER_USER /.cache /.hex /.mix" +# need to initialize .erlang.cookie manually here because / is not writable by $DOCKER_USER +docker exec -i $TTY -u root:root "$ERLANG_CONTAINER" bash -c "openssl rand -base64 16 > /.erlang.cookie && chown $DOCKER_USER /.erlang.cookie && chmod 0400 /.erlang.cookie" +# the user must exist inside the container for `whoami` to work +docker exec -i $TTY -u root:root "$ERLANG_CONTAINER" bash -c "useradd --uid $DOCKER_USER -M -d / emqx" || true +docker exec -i $TTY -u root:root "$ERLANG_CONTAINER" bash -c "chown -R $DOCKER_USER /var/lib/secret" || true if [ "$ONLY_UP" = 'yes' ]; then exit 0 @@ -242,8 +240,7 @@ else docker exec -e IS_CI="$IS_CI" -e PROFILE="$PROFILE" -i $TTY "$ERLANG_CONTAINER" bash -c "./rebar3 ct $REBAR3CT" fi RESULT=$? - restore_ownership - if [ $RESULT -ne 0 ]; then + if [ "$RESULT" -ne 0 ]; then LOG='_build/test/logs/docker-compose.log' echo "Dumping docker-compose log to $LOG" # shellcheck disable=2086 # no quotes for F_OPTIONS @@ -253,5 +250,5 @@ else # shellcheck disable=2086 # no quotes for F_OPTIONS $DC $F_OPTIONS down fi - exit $RESULT + exit "$RESULT" fi