From 903a77b47188ce2e8c3db22982aace6c1d2233d0 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 25 Jan 2023 15:33:05 +0300 Subject: [PATCH 001/131] test(redis): ensure batch query hit different cluster shards This will inevitably fail: it's not generally possible to update different keys through the same cluster connection, one or more update will fail with `MOVED` status. This testcase should serve as a regression test later. --- .../test/emqx_ee_bridge_redis_SUITE.erl | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl index 2b67787b2..8435204fd 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl @@ -16,6 +16,9 @@ %% CT boilerplate %%------------------------------------------------------------------------------ +-define(KEYSHARDS, 3). +-define(KEYPREFIX, "MSGS"). + -define(REDIS_TOXYPROXY_CONNECT_CONFIG, #{ <<"server">> => <<"toxiproxy:6379">>, <<"redis_type">> => <<"single">> @@ -23,7 +26,7 @@ -define(COMMON_REDIS_OPTS, #{ <<"password">> => <<"public">>, - <<"command_template">> => [<<"RPUSH">>, <<"MSGS">>, <<"${payload}">>], + <<"command_template">> => [<<"RPUSH">>, <>, <<"${payload}">>], <<"local_topic">> => <<"local_topic/#">> }). @@ -47,7 +50,7 @@ ) ). -all() -> [{group, transport_types}, {group, rest}]. +all() -> [{group, transports}, {group, rest}]. groups() -> ResourceSpecificTCs = [t_create_delete_bridge], @@ -63,7 +66,7 @@ groups() -> ], [ {rest, TCs}, - {transport_types, [ + {transports, [ {group, tcp}, {group, tls} ]}, @@ -79,7 +82,7 @@ groups() -> init_per_group(Group, Config) when Group =:= redis_single; Group =:= redis_sentinel; Group =:= redis_cluster -> - [{transport_type, Group} | Config]; + [{connector_type, Group} | Config]; init_per_group(Group, Config) when Group =:= tcp; Group =:= tls -> @@ -139,7 +142,7 @@ end_per_suite(_Config) -> init_per_testcase(_Testcase, Config) -> ok = delete_all_rules(), ok = delete_all_bridges(), - case ?config(transport_type, Config) of + case ?config(connector_type, Config) of undefined -> Config; RedisType -> @@ -162,7 +165,7 @@ end_per_testcase(_Testcase, Config) -> t_create_delete_bridge(Config) -> Name = <<"mybridge">>, - Type = ?config(transport_type, Config), + Type = ?config(connector_type, Config), BridgeConfig = ?config(bridge_config, Config), IsBatch = ?config(is_batch, Config), ?assertMatch( @@ -350,9 +353,7 @@ check_resource_queries(ResourceId, BaseTopic, IsBatch) -> ?wait_async_action( lists:foreach( fun(I) -> - IBin = integer_to_binary(I), - Topic = <>, - _ = publish_message(Topic, RandomPayload) + _ = publish_message(format_topic(BaseTopic, I), RandomPayload) end, lists:seq(1, N) ), @@ -360,7 +361,7 @@ check_resource_queries(ResourceId, BaseTopic, IsBatch) -> 5000 ), fun(Trace) -> - AddedMsgCount = length(added_msgs(ResourceId, RandomPayload)), + AddedMsgCount = length(added_msgs(ResourceId, BaseTopic, RandomPayload)), case IsBatch of true -> ?assertMatch( @@ -378,11 +379,23 @@ check_resource_queries(ResourceId, BaseTopic, IsBatch) -> end ). -added_msgs(ResourceId, Payload) -> - {ok, Results} = emqx_resource:simple_sync_query( - ResourceId, {cmd, [<<"LRANGE">>, <<"MSGS">>, <<"0">>, <<"-1">>]} - ), - [El || El <- Results, El =:= Payload]. +added_msgs(ResourceId, BaseTopic, Payload) -> + lists:flatmap( + fun(K) -> + {ok, Results} = emqx_resource:simple_sync_query( + ResourceId, + {cmd, [<<"LRANGE">>, K, <<"0">>, <<"-1">>]} + ), + [El || El <- Results, El =:= Payload] + end, + [format_redis_key(BaseTopic, S) || S <- lists:seq(0, ?KEYSHARDS - 1)] + ). + +format_topic(Base, I) -> + iolist_to_binary(io_lib:format("~s/~2..0B", [Base, I rem ?KEYSHARDS])). + +format_redis_key(Base, I) -> + iolist_to_binary([?KEYPREFIX, "/", format_topic(Base, I)]). conf_schema(StructName) -> #{ From 88364945429a7835427c16d38c1d330dd4a16d45 Mon Sep 17 00:00:00 2001 From: Erik Timan Date: Wed, 25 Jan 2023 11:50:12 +0100 Subject: [PATCH 002/131] fix: redact influxdb tokens in a few logs --- apps/emqx_bridge/src/emqx_bridge.erl | 2 +- apps/emqx_bridge/src/emqx_bridge_resource.erl | 6 +++--- apps/emqx_conf/src/emqx_cluster_rpc.erl | 8 ++++---- apps/emqx_conf/src/emqx_conf.app.src | 2 +- .../src/emqx_ee_connector_influxdb.erl | 19 +++++++++++++------ 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge.erl b/apps/emqx_bridge/src/emqx_bridge.erl index 5b3fe796b..f6677bd09 100644 --- a/apps/emqx_bridge/src/emqx_bridge.erl +++ b/apps/emqx_bridge/src/emqx_bridge.erl @@ -255,7 +255,7 @@ create(BridgeType, BridgeName, RawConf) -> brige_action => create, bridge_type => BridgeType, bridge_name => BridgeName, - bridge_raw_config => RawConf + bridge_raw_config => emqx_misc:redact(RawConf) }), emqx_conf:update( emqx_bridge:config_key_path() ++ [BridgeType, BridgeName], diff --git a/apps/emqx_bridge/src/emqx_bridge_resource.erl b/apps/emqx_bridge/src/emqx_bridge_resource.erl index cbff85df3..d228f2281 100644 --- a/apps/emqx_bridge/src/emqx_bridge_resource.erl +++ b/apps/emqx_bridge/src/emqx_bridge_resource.erl @@ -137,7 +137,7 @@ create(Type, Name, Conf, Opts0) -> msg => "create bridge", type => Type, name => Name, - config => Conf + config => emqx_misc:redact(Conf) }), Opts = override_start_after_created(Conf, Opts0), {ok, _Data} = emqx_resource:create_local( @@ -172,7 +172,7 @@ update(Type, Name, {OldConf, Conf}, Opts0) -> msg => "update bridge", type => Type, name => Name, - config => Conf + config => emqx_misc:redact(Conf) }), case recreate(Type, Name, Conf, Opts) of {ok, _} -> @@ -182,7 +182,7 @@ update(Type, Name, {OldConf, Conf}, Opts0) -> msg => "updating_a_non_existing_bridge", type => Type, name => Name, - config => Conf + config => emqx_misc:redact(Conf) }), create(Type, Name, Conf, Opts); {error, Reason} -> diff --git a/apps/emqx_conf/src/emqx_cluster_rpc.erl b/apps/emqx_conf/src/emqx_cluster_rpc.erl index fe701049c..c285e09b8 100644 --- a/apps/emqx_conf/src/emqx_cluster_rpc.erl +++ b/apps/emqx_conf/src/emqx_cluster_rpc.erl @@ -495,15 +495,15 @@ log_and_alarm(IsSuccess, Res, #{kind := ?APPLY_KIND_INITIATE} = Meta) -> %% because nothing is committed case IsSuccess of true -> - ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_result", result => Res}); + ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_result", result => emqx_misc:redact(Res)}); false -> - ?SLOG(warning, Meta#{msg => "cluster_rpc_apply_result", result => Res}) + ?SLOG(warning, Meta#{msg => "cluster_rpc_apply_result", result => emqx_misc:redact(Res)}) end; log_and_alarm(true, Res, Meta) -> - ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_ok", result => Res}), + ?SLOG(debug, Meta#{msg => "cluster_rpc_apply_ok", result => emqx_misc:redact(Res)}), do_alarm(deactivate, Res, Meta); log_and_alarm(false, Res, Meta) -> - ?SLOG(error, Meta#{msg => "cluster_rpc_apply_failed", result => Res}), + ?SLOG(error, Meta#{msg => "cluster_rpc_apply_failed", result => emqx_misc:redact(Res)}), do_alarm(activate, Res, Meta). do_alarm(Fun, Res, #{tnx_id := Id} = Meta) -> diff --git a/apps/emqx_conf/src/emqx_conf.app.src b/apps/emqx_conf/src/emqx_conf.app.src index b13c0d055..f7fd33e3b 100644 --- a/apps/emqx_conf/src/emqx_conf.app.src +++ b/apps/emqx_conf/src/emqx_conf.app.src @@ -1,6 +1,6 @@ {application, emqx_conf, [ {description, "EMQX configuration management"}, - {vsn, "0.1.10"}, + {vsn, "0.1.11"}, {registered, []}, {mod, {emqx_conf_app, []}}, {applications, [kernel, stdlib]}, diff --git a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl index 824233a6d..ef1b2edc9 100644 --- a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl +++ b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl @@ -200,8 +200,8 @@ start_client(InstId, Config) -> ?SLOG(info, #{ msg => "starting influxdb connector", connector => InstId, - config => Config, - client_config => ClientConfig + config => emqx_misc:redact(Config), + client_config => emqx_misc:redact(ClientConfig) }), try do_start_client(InstId, ClientConfig, Config) @@ -236,8 +236,8 @@ do_start_client( ?SLOG(info, #{ msg => "starting influxdb connector success", connector => InstId, - client => Client, - state => State + client => redact_auth(Client), + state => redact_auth(State) }), {ok, State}; false -> @@ -245,7 +245,7 @@ do_start_client( ?SLOG(error, #{ msg => "starting influxdb connector failed", connector => InstId, - client => Client, + client => redact_auth(Client), reason => "client is not alive" }), {error, influxdb_client_not_alive} @@ -255,7 +255,7 @@ do_start_client( ?SLOG(info, #{ msg => "restarting influxdb connector, found already started client", connector => InstId, - old_client => Client0 + old_client => redact_auth(Client0) }), _ = influxdb:stop_client(Client0), do_start_client(InstId, ClientConfig, Config); @@ -338,6 +338,13 @@ password(#{password := Password}) -> password(_) -> []. +redact_auth(Term) -> + emqx_misc:redact(Term, fun is_auth_key/1). + +is_auth_key(<<"Authorization">>) -> true; +is_auth_key(<<"authorization">>) -> true; +is_auth_key(_) -> false. + %% ------------------------------------------------------------------------------------------------- %% Query do_query(InstId, Client, Points) -> From 805d08e823364f02f1d7ab7c7958994f68f8e661 Mon Sep 17 00:00:00 2001 From: Erik Timan Date: Wed, 25 Jan 2023 11:56:27 +0100 Subject: [PATCH 003/131] fix: reduce log level from error to warning in several places This reduces the log level from error to warning in places that are connected to the influxdb bridge. Transient errors for external resources should not render an error log. --- apps/emqx_resource/src/emqx_resource_manager.erl | 4 ++-- lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 8098dbe42..47e0cf658 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -487,7 +487,7 @@ start_resource(Data, From) -> Actions = maybe_reply([{state_timeout, 0, health_check}], From, ok), {next_state, connecting, UpdatedData, Actions}; {error, Reason} = Err -> - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => start_resource_failed, id => Data#data.id, reason => Reason @@ -546,7 +546,7 @@ handle_connected_health_check(Data) -> Actions = [{state_timeout, health_check_interval(Data#data.opts), health_check}], {keep_state, UpdatedData, Actions}; (Status, UpdatedData) -> - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => health_check_failed, id => Data#data.id, status => Status diff --git a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl index ef1b2edc9..1370ed2c2 100644 --- a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl +++ b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl @@ -208,7 +208,7 @@ start_client(InstId, Config) -> catch E:R:S -> ?tp(influxdb_connector_start_exception, #{error => {E, R}}), - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => "start influxdb connector error", connector => InstId, error => E, @@ -242,7 +242,7 @@ do_start_client( {ok, State}; false -> ?tp(influxdb_connector_start_failed, #{error => influxdb_client_not_alive}), - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => "starting influxdb connector failed", connector => InstId, client => redact_auth(Client), @@ -261,7 +261,7 @@ do_start_client( do_start_client(InstId, ClientConfig, Config); {error, Reason} -> ?tp(influxdb_connector_start_failed, #{error => Reason}), - ?SLOG(error, #{ + ?SLOG(warning, #{ msg => "starting influxdb connector failed", connector => InstId, reason => Reason From 26fcaecad78c52637c06b445d82b2aee63033c88 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 25 Jan 2023 15:41:52 +0300 Subject: [PATCH 004/131] fix(redis): disable batching in `redis_cluster` bridges Through configuration subsystem. --- .../src/emqx_ee_bridge_redis.erl | 56 +++++++++++++++---- .../test/emqx_ee_bridge_redis_SUITE.erl | 11 ++-- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl index 3a3963786..6b7239a76 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl @@ -7,7 +7,7 @@ -include_lib("typerefl/include/types.hrl"). -include_lib("hocon/include/hoconsc.hrl"). --import(hoconsc, [mk/2, enum/1, ref/2]). +-import(hoconsc, [mk/2, enum/1, ref/1, ref/2]). -export([ conn_bridge_examples/1 @@ -80,13 +80,20 @@ values(common, RedisType, SpecificOpts) -> pool_size => 8, password => <<"secret">>, command_template => [<<"LPUSH">>, <<"MSGS">>, <<"${payload}">>], - resource_opts => #{ + resource_opts => values(resource_opts, RedisType, #{}), + ssl => #{enable => false} + }, + maps:merge(Config, SpecificOpts); +values(resource_opts, "cluster", SpecificOpts) -> + SpecificOpts; +values(resource_opts, _RedisType, SpecificOpts) -> + maps:merge( + #{ batch_size => 1, batch_time => <<"20ms">> }, - ssl => #{enable => false} - }, - maps:merge(Config, SpecificOpts). + SpecificOpts + ). %% ------------------------------------------------------------------------------------------------- %% Hocon Schema Definitions @@ -115,29 +122,31 @@ fields("get_cluster") -> fields(Type) when Type == redis_single orelse Type == redis_sentinel orelse Type == redis_cluster -> - redis_bridge_common_fields() ++ - connector_fields(Type). + redis_bridge_common_fields(Type) ++ + connector_fields(Type); +fields("creation_opts_" ++ Type) -> + resource_creation_fields(Type). method_fileds(post, ConnectorType) -> - redis_bridge_common_fields() ++ + redis_bridge_common_fields(ConnectorType) ++ connector_fields(ConnectorType) ++ type_name_fields(ConnectorType); method_fileds(get, ConnectorType) -> - redis_bridge_common_fields() ++ + redis_bridge_common_fields(ConnectorType) ++ connector_fields(ConnectorType) ++ type_name_fields(ConnectorType) ++ emqx_bridge_schema:status_fields(); method_fileds(put, ConnectorType) -> - redis_bridge_common_fields() ++ + redis_bridge_common_fields(ConnectorType) ++ connector_fields(ConnectorType). -redis_bridge_common_fields() -> +redis_bridge_common_fields(Type) -> emqx_bridge_schema:common_bridge_fields() ++ [ {local_topic, mk(binary(), #{desc => ?DESC("local_topic")})}, {command_template, fun command_template/1} ] ++ - emqx_resource_schema:fields("resource_opts"). + resource_fields(Type). connector_fields(Type) -> RedisType = bridge_type_to_redis_conn_type(Type), @@ -156,6 +165,27 @@ type_name_fields(Type) -> {name, mk(binary(), #{required => true, desc => ?DESC("desc_name")})} ]. +resource_fields(Type) -> + [ + {resource_opts, + mk( + ref("creation_opts_" ++ atom_to_list(Type)), + #{ + required => false, + default => #{}, + desc => ?DESC(emqx_resource_schema, <<"resource_opts">>) + } + )} + ]. + +resource_creation_fields("redis_cluster") -> + % TODO + % Cluster bridge is currently incompatible with batching. + Fields = emqx_resource_schema:fields("creation_opts"), + lists:foldl(fun proplists:delete/2, Fields, [batch_size, batch_time]); +resource_creation_fields(_) -> + emqx_resource_schema:fields("creation_opts"). + desc("config") -> ?DESC("desc_config"); desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> @@ -166,6 +196,8 @@ desc(redis_sentinel) -> ?DESC(emqx_connector_redis, "sentinel"); desc(redis_cluster) -> ?DESC(emqx_connector_redis, "cluster"); +desc("creation_opts_" ++ _Type) -> + ?DESC(emqx_resource_schema, "creation_opts"); desc(_) -> undefined. diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl index 8435204fd..31c75ede4 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl @@ -142,12 +142,13 @@ end_per_suite(_Config) -> init_per_testcase(_Testcase, Config) -> ok = delete_all_rules(), ok = delete_all_bridges(), - case ?config(connector_type, Config) of - undefined -> + case {?config(connector_type, Config), ?config(batch_mode, Config)} of + {undefined, _} -> Config; - RedisType -> + {redis_cluster, batch_on} -> + {skip, "Batching is not supported by 'redis_cluster' bridge type"}; + {RedisType, BatchMode} -> Transport = ?config(transport, Config), - BatchMode = ?config(batch_mode, Config), #{RedisType := #{Transport := RedisConnConfig}} = redis_connect_configs(), #{BatchMode := ResourceConfig} = resource_configs(), IsBatch = (BatchMode =:= batch_on), @@ -522,7 +523,6 @@ invalid_command_bridge_config() -> Conf1#{ <<"resource_opts">> => #{ <<"query_mode">> => <<"sync">>, - <<"batch_size">> => <<"1">>, <<"worker_pool_size">> => <<"1">>, <<"start_timeout">> => <<"15s">> }, @@ -533,7 +533,6 @@ resource_configs() -> #{ batch_off => #{ <<"query_mode">> => <<"sync">>, - <<"batch_size">> => <<"1">>, <<"start_timeout">> => <<"15s">> }, batch_on => #{ From 2ee00b75a70bf957ad708ae71aeadfb61379fd12 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 25 Jan 2023 16:49:55 +0300 Subject: [PATCH 005/131] fix(redis): unwrap pipeline queries against redis cluster This is an additional safety measure in addition to the disabled batching on the bridge level. --- apps/emqx_connector/src/emqx_connector_redis.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/emqx_connector/src/emqx_connector_redis.erl b/apps/emqx_connector/src/emqx_connector_redis.erl index 4bb46bca3..286f7dea6 100644 --- a/apps/emqx_connector/src/emqx_connector_redis.erl +++ b/apps/emqx_connector/src/emqx_connector_redis.erl @@ -222,6 +222,8 @@ is_unrecoverable_error(Results) when is_list(Results) -> lists:any(fun is_unrecoverable_error/1, Results); is_unrecoverable_error({error, <<"ERR unknown command ", _/binary>>}) -> true; +is_unrecoverable_error({error, invalid_cluster_command}) -> + true; is_unrecoverable_error(_) -> false. @@ -267,7 +269,9 @@ do_cmd(PoolName, cluster, {cmd, Command}) -> do_cmd(Conn, _Type, {cmd, Command}) -> eredis:q(Conn, Command); do_cmd(PoolName, cluster, {cmds, Commands}) -> - wrap_qp_result(eredis_cluster:qp(PoolName, Commands)); + % TODO + % Cluster mode is currently incompatible with batching. + wrap_qp_result([eredis_cluster:q(PoolName, Command) || Command <- Commands]); do_cmd(Conn, _Type, {cmds, Commands}) -> wrap_qp_result(eredis:qp(Conn, Commands)). From 1f235ffee935696e15fb06c2da8aa7c7e31c9810 Mon Sep 17 00:00:00 2001 From: Erik Timan Date: Thu, 26 Jan 2023 10:30:13 +0100 Subject: [PATCH 006/131] refactor(emqx_ee_connector): redo readact key function --- .../src/emqx_ee_connector_influxdb.erl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl index 1370ed2c2..402202b11 100644 --- a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl +++ b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl @@ -341,9 +341,10 @@ password(_) -> redact_auth(Term) -> emqx_misc:redact(Term, fun is_auth_key/1). -is_auth_key(<<"Authorization">>) -> true; -is_auth_key(<<"authorization">>) -> true; -is_auth_key(_) -> false. +is_auth_key(Key) when is_binary(Key) -> + string:equal("authorization", Key, true); +is_auth_key(_) -> + false. %% ------------------------------------------------------------------------------------------------- %% Query @@ -628,6 +629,13 @@ is_unrecoverable_error(_) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +is_auth_key_test_() -> + [ + ?_assert(is_auth_key(<<"Authorization">>)), + ?_assertNot(is_auth_key(<<"Something">>)), + ?_assertNot(is_auth_key(89)) + ]. + %% for coverage desc_test_() -> [ From 86cfbfb43c6f81027617ce59a4791e5d39a30a31 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Wed, 25 Jan 2023 14:07:56 +0100 Subject: [PATCH 007/131] fix: Authorization header leak in log entries for webhook There might be another possibility for leakage. If the resource mangager for the webhook resource crashes, OTP might log the spec for the resource manager which contains the Config and thus the Authorization header. This is probably an issue for other resources as well and should be fixed in another commit. The following issue has been created for that: https://emqx.atlassian.net/browse/EMQX-8794 Fixes: https://emqx.atlassian.net/browse/EMQX-8791 --- apps/emqx/src/emqx_misc.erl | 6 ++- .../src/emqx_connector_http.erl | 52 ++++++++++++++++--- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/apps/emqx/src/emqx_misc.erl b/apps/emqx/src/emqx_misc.erl index c20227c07..fbeec8724 100644 --- a/apps/emqx/src/emqx_misc.erl +++ b/apps/emqx/src/emqx_misc.erl @@ -609,7 +609,11 @@ do_redact(K, V, Checker) -> -define(REDACT_VAL, "******"). redact_v(V) when is_binary(V) -> <>; -redact_v(_V) -> ?REDACT_VAL. +%% The HOCON schema system may generate sensitive values with this format +redact_v([{str, Bin}]) when is_binary(Bin) -> + [{str, <>}]; +redact_v(_V) -> + ?REDACT_VAL. is_redacted(K, V) -> do_is_redacted(K, V, fun is_sensitive_key/1). diff --git a/apps/emqx_connector/src/emqx_connector_http.erl b/apps/emqx_connector/src/emqx_connector_http.erl index 7f84c665a..40df52d45 100644 --- a/apps/emqx_connector/src/emqx_connector_http.erl +++ b/apps/emqx_connector/src/emqx_connector_http.erl @@ -209,7 +209,7 @@ on_start( ?SLOG(info, #{ msg => "starting_http_connector", connector => InstId, - config => emqx_misc:redact(Config) + config => redact(Config) }), {Transport, TransportOpts} = case Scheme of @@ -285,7 +285,11 @@ on_query( ?TRACE( "QUERY", "http_connector_received", - #{request => Request, connector => InstId, state => State} + #{ + request => redact(Request), + connector => InstId, + state => redact(State) + } ), NRequest = formalize_request(Method, BasePath, Request), case @@ -310,7 +314,7 @@ on_query( {error, Reason} = Result -> ?SLOG(error, #{ msg => "http_connector_do_request_failed", - request => NRequest, + request => redact(NRequest), reason => Reason, connector => InstId }), @@ -322,7 +326,7 @@ on_query( {ok, StatusCode, Headers} -> ?SLOG(error, #{ msg => "http connector do request, received error response", - request => NRequest, + request => redact(NRequest), connector => InstId, status_code => StatusCode }), @@ -330,7 +334,7 @@ on_query( {ok, StatusCode, Headers, Body} -> ?SLOG(error, #{ msg => "http connector do request, received error response", - request => NRequest, + request => redact(NRequest), connector => InstId, status_code => StatusCode }), @@ -366,7 +370,11 @@ on_query_async( ?TRACE( "QUERY_ASYNC", "http_connector_received", - #{request => Request, connector => InstId, state => State} + #{ + request => redact(Request), + connector => InstId, + state => redact(State) + } ), NRequest = formalize_request(Method, BasePath, Request), Worker = @@ -401,7 +409,7 @@ do_get_status(PoolName, Timeout) -> {error, Reason} = Error -> ?SLOG(error, #{ msg => "http_connector_get_status_failed", - reason => Reason, + reason => redact(Reason), worker => Worker }), Error @@ -554,3 +562,33 @@ reply_delegator(ReplyFunAndArgs, Result) -> _ -> emqx_resource:apply_reply_fun(ReplyFunAndArgs, Result) end. + +%% The HOCON schema system may generate sensitive keys with this format +is_sensitive_key([{str, StringKey}]) -> + is_sensitive_key(StringKey); +is_sensitive_key(Atom) when is_atom(Atom) -> + is_sensitive_key(erlang:atom_to_binary(Atom)); +is_sensitive_key(Bin) when is_binary(Bin), (size(Bin) =:= 19 orelse size(Bin) =:= 13) -> + try + %% This is wrapped in a try-catch since we don't know that Bin is a + %% valid string so string:lowercase/1 might throw an exception. + %% + %% We want to convert this to lowercase since the http header fields + %% are case insensitive, which means that a user of the Webhook bridge + %% can write this field name in many different ways. + LowercaseBin = iolist_to_binary(string:lowercase(Bin)), + case LowercaseBin of + <<"authorization">> -> true; + <<"proxy-authorization">> -> true; + _ -> false + end + catch + _:_ -> false + end; +is_sensitive_key(_) -> + false. + +%% Function that will do a deep traversal of Data and remove sensitive +%% information (i.e., passwords) +redact(Data) -> + emqx_misc:redact(Data, fun is_sensitive_key/1). From f6b3b930b0716124aa514bd3ab4859db762eb3f4 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 19 Jan 2023 11:54:56 +0100 Subject: [PATCH 008/131] chore: improve a error log --- apps/emqx/src/emqx_config.erl | 8 +++++++- apps/emqx_authz/test/emqx_authz_redis_SUITE.erl | 3 +-- .../test/emqx_ee_bridge_gcp_pubsub_SUITE.erl | 3 --- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/emqx/src/emqx_config.erl b/apps/emqx/src/emqx_config.erl index ba4095daa..204e32a2b 100644 --- a/apps/emqx/src/emqx_config.erl +++ b/apps/emqx/src/emqx_config.erl @@ -424,7 +424,13 @@ check_config(SchemaMod, RawConf, Opts0) -> %% it's maybe too much when reporting to the user -spec compact_errors(any(), any()) -> no_return(). compact_errors(Schema, [Error0 | More]) when is_map(Error0) -> - Error1 = Error0#{discarded_errors_count => length(More)}, + Error1 = + case length(More) of + 0 -> + Error0; + _ -> + Error0#{unshown_errors => length(More)} + end, Error = case is_atom(Schema) of true -> diff --git a/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl b/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl index 15b180c96..c07d920ad 100644 --- a/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl +++ b/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl @@ -188,8 +188,7 @@ t_create_invalid_config(_Config) -> ?assertMatch( {error, #{ kind := validation_error, - path := "authorization.sources.1", - discarded_errors_count := 0 + path := "authorization.sources.1" }}, emqx_authz:update(?CMD_REPLACE, [C]) ). diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl index 247b7799b..222acb77b 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_gcp_pubsub_SUITE.erl @@ -850,7 +850,6 @@ test_publish_success_batch(Config) -> t_not_a_json(Config) -> ?assertMatch( {error, #{ - discarded_errors_count := 0, kind := validation_error, reason := #{exception := {error, {badmap, "not a json"}}}, %% should be censored as it contains secrets @@ -868,7 +867,6 @@ t_not_a_json(Config) -> t_not_of_service_account_type(Config) -> ?assertMatch( {error, #{ - discarded_errors_count := 0, kind := validation_error, reason := {wrong_type, <<"not a service account">>}, %% should be censored as it contains secrets @@ -887,7 +885,6 @@ t_json_missing_fields(Config) -> GCPPubSubConfig0 = ?config(gcp_pubsub_config, Config), ?assertMatch( {error, #{ - discarded_errors_count := 0, kind := validation_error, reason := {missing_keys, [ From bb26632c8a3d808a554302735dd77cd4d135fadb Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 26 Jan 2023 14:33:16 +0100 Subject: [PATCH 009/131] fix(buffer_worker): fix a wrong assertion the assertion is to ensure queue items are not binary but should not assert the queue itself --- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 11d3753f0..eaae64dd8 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -1084,9 +1084,10 @@ estimate_size(QItem) -> erlang:external_size(QItem). -spec append_queue(id(), index(), replayq:q(), [queue_query()]) -> replayq:q(). -append_queue(Id, Index, Q, Queries) when not is_binary(Q) -> - %% we must not append a raw binary because the marshaller will get - %% lost. +append_queue(Id, Index, Q, Queries) -> + %% this assertion is to ensure that we never append a raw binary + %% because the marshaller will get lost. + false = is_binary(hd(Queries)), Q0 = replayq:append(Q, Queries), Q2 = case replayq:overflow(Q0) of From 25b4821adc53b885b0a8641799b2e37834c1de1a Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 26 Jan 2023 14:40:41 +0100 Subject: [PATCH 010/131] refactor: move the the per-message overflow log from error to info level --- .../src/emqx_resource_buffer_worker.erl | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index eaae64dd8..28e2c785d 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -437,7 +437,7 @@ collect_and_enqueue_query_requests(Request0, Data0) -> end, Requests ), - NewQ = append_queue(Id, Index, Q, Queries), + {_Overflow, NewQ} = append_queue(Id, Index, Q, Queries), Data = Data0#{queue := NewQ}, {Queries, Data}. @@ -1089,18 +1089,22 @@ append_queue(Id, Index, Q, Queries) -> %% because the marshaller will get lost. false = is_binary(hd(Queries)), Q0 = replayq:append(Q, Queries), - Q2 = + {Overflow, Q2} = case replayq:overflow(Q0) of - Overflow when Overflow =< 0 -> - Q0; - Overflow -> - PopOpts = #{bytes_limit => Overflow, count_limit => 999999999}, + OverflowBytes when OverflowBytes =< 0 -> + {[], Q0}; + OverflowBytes -> + PopOpts = #{bytes_limit => OverflowBytes, count_limit => 999999999}, {Q1, QAckRef, Items2} = replayq:pop(Q0, PopOpts), ok = replayq:ack(Q1, QAckRef), Dropped = length(Items2), emqx_resource_metrics:dropped_queue_full_inc(Id), - ?SLOG(error, #{msg => drop_query, reason => queue_full, dropped => Dropped}), - Q1 + ?SLOG(info, #{ + msg => buffer_worker_overflow, + worker_id => Id, + dropped => Dropped + }), + {Items2, Q1} end, emqx_resource_metrics:queuing_set(Id, Index, queue_count(Q2)), ?tp( @@ -1108,10 +1112,11 @@ append_queue(Id, Index, Q, Queries) -> #{ id => Id, items => Queries, - queue_count => queue_count(Q2) + queue_count => queue_count(Q2), + overflow => length(Overflow) } ), - Q2. + {Overflow, Q2}. %%============================================================================== %% the inflight queue for async query From ed2878916447dca24f03a022ba50014cc70b2f2a Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 26 Jan 2023 14:50:40 +0100 Subject: [PATCH 011/131] refactor(buffer_worker): no need to return after collect into buf queue --- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 28e2c785d..63a402daa 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -243,7 +243,7 @@ blocked(cast, flush, Data) -> blocked(state_timeout, unblock, St) -> resume_from_blocked(St); blocked(info, ?SEND_REQ(_ReqFrom, {query, _Request, _Opts}) = Request0, Data0) -> - {_Queries, Data} = collect_and_enqueue_query_requests(Request0, Data0), + Data = collect_and_enqueue_query_requests(Request0, Data0), {keep_state, Data}; blocked(info, {flush, _Ref}, _Data) -> keep_state_and_data; @@ -412,7 +412,7 @@ retry_inflight_sync(Ref, QueryOrBatch, Data0) -> -spec handle_query_requests(?SEND_REQ(request_from(), request()), data()) -> gen_statem:event_handler_result(state(), data()). handle_query_requests(Request0, Data0) -> - {_Queries, Data} = collect_and_enqueue_query_requests(Request0, Data0), + Data = collect_and_enqueue_query_requests(Request0, Data0), maybe_flush(Data). collect_and_enqueue_query_requests(Request0, Data0) -> @@ -438,8 +438,7 @@ collect_and_enqueue_query_requests(Request0, Data0) -> Requests ), {_Overflow, NewQ} = append_queue(Id, Index, Q, Queries), - Data = Data0#{queue := NewQ}, - {Queries, Data}. + Data0#{queue := NewQ}. maybe_flush(Data0) -> #{ From 1f799dfd59e21b8901cabd31ddaa7570eb09fc98 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 26 Jan 2023 16:56:32 +0100 Subject: [PATCH 012/131] fix: reply with {error, buffer_overflow} when discarded --- .../src/emqx_resource_buffer_worker.erl | 35 ++++++++++++++----- .../test/emqx_resource_SUITE.erl | 4 +-- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 63a402daa..3d08f0289 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -242,7 +242,7 @@ blocked(cast, flush, Data) -> resume_from_blocked(Data); blocked(state_timeout, unblock, St) -> resume_from_blocked(St); -blocked(info, ?SEND_REQ(_ReqFrom, {query, _Request, _Opts}) = Request0, Data0) -> +blocked(info, ?SEND_REQ(_ReqFrom, _Req) = Request0, Data0) -> Data = collect_and_enqueue_query_requests(Request0, Data0), {keep_state, Data}; blocked(info, {flush, _Ref}, _Data) -> @@ -437,9 +437,25 @@ collect_and_enqueue_query_requests(Request0, Data0) -> end, Requests ), - {_Overflow, NewQ} = append_queue(Id, Index, Q, Queries), + {Overflown, NewQ} = append_queue(Id, Index, Q, Queries), + ok = reply_overflown(Overflown), Data0#{queue := NewQ}. +reply_overflown([]) -> + ok; +reply_overflown([?QUERY(From, _Req, _HasBeenSent, _ExpireAt) | More]) -> + do_reply_caller(From, {error, buffer_overflow}), + reply_overflown(More). + +do_reply_caller(undefined, _Result) -> + ok; +do_reply_caller({F, Args}, Result) when is_function(F) -> + _ = erlang:apply(F, Args ++ [Result]), + ok; +do_reply_caller(From, Result) -> + _ = gen_statem:reply(From, Result), + ok. + maybe_flush(Data0) -> #{ batch_size := BatchSize, @@ -1082,18 +1098,19 @@ queue_item_marshaller(Item) -> estimate_size(QItem) -> erlang:external_size(QItem). --spec append_queue(id(), index(), replayq:q(), [queue_query()]) -> replayq:q(). +-spec append_queue(id(), index(), replayq:q(), [queue_query()]) -> + {[queue_query()], replayq:q()}. append_queue(Id, Index, Q, Queries) -> %% this assertion is to ensure that we never append a raw binary %% because the marshaller will get lost. false = is_binary(hd(Queries)), Q0 = replayq:append(Q, Queries), - {Overflow, Q2} = + {Overflown, Q2} = case replayq:overflow(Q0) of - OverflowBytes when OverflowBytes =< 0 -> + OverflownBytes when OverflownBytes =< 0 -> {[], Q0}; - OverflowBytes -> - PopOpts = #{bytes_limit => OverflowBytes, count_limit => 999999999}, + OverflownBytes -> + PopOpts = #{bytes_limit => OverflownBytes, count_limit => 999999999}, {Q1, QAckRef, Items2} = replayq:pop(Q0, PopOpts), ok = replayq:ack(Q1, QAckRef), Dropped = length(Items2), @@ -1112,10 +1129,10 @@ append_queue(Id, Index, Q, Queries) -> id => Id, items => Queries, queue_count => queue_count(Q2), - overflow => length(Overflow) + overflown => length(Overflown) } ), - {Overflow, Q2}. + {Overflown, Q2}. %%============================================================================== %% the inflight queue for async query diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 9b2af74f6..34a92a5a2 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -1226,8 +1226,8 @@ t_always_overflow(_Config) -> Payload = binary:copy(<<"a">>, 100), %% since it's sync and it should never send a request, this %% errors with `timeout'. - ?assertError( - timeout, + ?assertEqual( + {error, buffer_overflow}, emqx_resource:query( ?ID, {big_payload, Payload}, From c95f9794131d8b8e14a6ad92c40131814f7b72d3 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 26 Jan 2023 21:12:05 +0300 Subject: [PATCH 013/131] fix(mqtt_bridge): use correct gen_statem reply action --- apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index ba2162993..05b9f7bf1 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -305,7 +305,7 @@ connected({call, From}, {send_to_remote, Msg}, State) -> {ok, NState} -> {keep_state, NState, [{reply, From, ok}]}; {error, Reason} -> - {keep_state_and_data, [[reply, From, {error, Reason}]]} + {keep_state_and_data, {reply, From, {error, Reason}}} end; connected(cast, {send_to_remote_async, Msg, Callback}, State) -> _ = do_send_async(State, Msg, Callback), From 71f996b9d5f5572ad9e3fe03ad8bbded5d0ab2ec Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 26 Jan 2023 21:13:18 +0300 Subject: [PATCH 014/131] refactor(mqtt-bridge): unwrap single statem actions So that the code would be easier to follow and harder to break. Also drop a couple of unused macrodefs. --- .../src/mqtt/emqx_connector_mqtt_worker.erl | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index 05b9f7bf1..00b45789e 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -116,8 +116,6 @@ %% same as default in-flight limit for emqtt -define(DEFAULT_INFLIGHT_SIZE, 32). -define(DEFAULT_RECONNECT_DELAY_MS, timer:seconds(5)). --define(DEFAULT_SEG_BYTES, (1 bsl 20)). --define(DEFAULT_MAX_TOTAL_SIZE, (1 bsl 31)). %% @doc Start a bridge worker. Supported configs: %% start_type: 'manual' (default) or 'auto', when manual, bridge will stay @@ -274,10 +272,10 @@ idle({call, From}, ensure_started, State) -> {ok, State1} -> {next_state, connected, State1, [{reply, From, ok}, {state_timeout, 0, connected}]}; {error, Reason, _State} -> - {keep_state_and_data, [{reply, From, {error, Reason}}]} + {keep_state_and_data, {reply, From, {error, Reason}}} end; idle({call, From}, {send_to_remote, _}, _State) -> - {keep_state_and_data, [{reply, From, {error, {recoverable_error, not_connected}}}]}; + {keep_state_and_data, {reply, From, {error, {recoverable_error, not_connected}}}}; %% @doc Standing by for manual start. idle(info, idle, #{start_type := manual}) -> keep_state_and_data; @@ -303,7 +301,7 @@ connected(state_timeout, connected, State) -> connected({call, From}, {send_to_remote, Msg}, State) -> case do_send(State, Msg) of {ok, NState} -> - {keep_state, NState, [{reply, From, ok}]}; + {keep_state, NState, {reply, From, ok}}; {error, Reason} -> {keep_state_and_data, {reply, From, {error, Reason}}} end; @@ -328,21 +326,21 @@ connected(Type, Content, State) -> %% Common handlers common(StateName, {call, From}, status, _State) -> - {keep_state_and_data, [{reply, From, StateName}]}; + {keep_state_and_data, {reply, From, StateName}}; common(_StateName, {call, From}, ping, #{connection := Conn} = _State) -> Reply = emqx_connector_mqtt_mod:ping(Conn), - {keep_state_and_data, [{reply, From, Reply}]}; + {keep_state_and_data, {reply, From, Reply}}; common(_StateName, {call, From}, ensure_stopped, #{connection := undefined} = _State) -> - {keep_state_and_data, [{reply, From, ok}]}; + {keep_state_and_data, {reply, From, ok}}; common(_StateName, {call, From}, ensure_stopped, #{connection := Conn} = State) -> Reply = emqx_connector_mqtt_mod:stop(Conn), - {next_state, idle, State#{connection => undefined}, [{reply, From, Reply}]}; + {next_state, idle, State#{connection => undefined}, {reply, From, Reply}}; common(_StateName, {call, From}, get_forwards, #{connect_opts := #{forwards := Forwards}}) -> - {keep_state_and_data, [{reply, From, Forwards}]}; + {keep_state_and_data, {reply, From, Forwards}}; common(_StateName, {call, From}, get_subscriptions, #{connection := Connection}) -> - {keep_state_and_data, [{reply, From, maps:get(subscriptions, Connection, #{})}]}; + {keep_state_and_data, {reply, From, maps:get(subscriptions, Connection, #{})}}; common(_StateName, {call, From}, Req, _State) -> - {keep_state_and_data, [{reply, From, {error, {unsupported_request, Req}}}]}; + {keep_state_and_data, {reply, From, {error, {unsupported_request, Req}}}}; common(_StateName, info, {'EXIT', _, _}, State) -> {keep_state, State}; common(StateName, Type, Content, #{name := Name} = State) -> From d4fab92b72237b850b4b7e231eca9e295c48a2b6 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 26 Jan 2023 18:00:20 +0100 Subject: [PATCH 015/131] refactor(buffer_worker): no need to keep request for REPLY macro --- .../src/emqx_resource_buffer_worker.erl | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 3d08f0289..5460a8198 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -63,11 +63,7 @@ -define(SEND_REQ(FROM, REQUEST), {'$send_req', FROM, REQUEST}). -define(QUERY(FROM, REQUEST, SENT, EXPIRE_AT), {query, FROM, REQUEST, SENT, EXPIRE_AT}). -define(SIMPLE_QUERY(REQUEST), ?QUERY(undefined, REQUEST, false, infinity)). --define(REPLY(FROM, REQUEST, SENT, RESULT), {reply, FROM, REQUEST, SENT, RESULT}). --define(EXPAND(RESULT, BATCH), [ - ?REPLY(FROM, REQUEST, SENT, RESULT) - || ?QUERY(FROM, REQUEST, SENT, _EXPIRE_AT) <- BATCH -]). +-define(REPLY(FROM, SENT, RESULT), {reply, FROM, SENT, RESULT}). -define(INFLIGHT_ITEM(Ref, BatchOrQuery, IsRetriable, WorkerMRef), {Ref, BatchOrQuery, IsRetriable, WorkerMRef} ). @@ -370,8 +366,8 @@ retry_inflight_sync(Ref, QueryOrBatch, Data0) -> Result = call_query(sync, Id, Index, Ref, QueryOrBatch, QueryOpts), ReplyResult = case QueryOrBatch of - ?QUERY(From, CoreReq, HasBeenSent, _ExpireAt) -> - Reply = ?REPLY(From, CoreReq, HasBeenSent, Result), + ?QUERY(From, _, HasBeenSent, _ExpireAt) -> + Reply = ?REPLY(From, HasBeenSent, Result), reply_caller_defer_metrics(Id, Reply, QueryOpts); [?QUERY(_, _, _, _) | _] = Batch -> batch_reply_caller_defer_metrics(Id, Result, Batch, QueryOpts) @@ -548,10 +544,10 @@ do_flush( inflight_tid := InflightTID } = Data0, %% unwrap when not batching (i.e., batch size == 1) - [?QUERY(From, CoreReq, HasBeenSent, _ExpireAt) = Request] = Batch, + [?QUERY(From, _, HasBeenSent, _ExpireAt) = Request] = Batch, QueryOpts = #{inflight_tid => InflightTID, simple_query => false}, Result = call_query(configured, Id, Index, Ref, Request, QueryOpts), - Reply = ?REPLY(From, CoreReq, HasBeenSent, Result), + Reply = ?REPLY(From, HasBeenSent, Result), case reply_caller(Id, Reply, QueryOpts) of %% Failed; remove the request from the queue, as we cannot pop %% from it again, but we'll retry it using the inflight table. @@ -705,6 +701,14 @@ batch_reply_caller(Id, BatchResult, Batch, QueryOpts) -> ShouldBlock. batch_reply_caller_defer_metrics(Id, BatchResult, Batch, QueryOpts) -> + %% the `Mod:on_batch_query/3` returns a single result for a batch, + %% so we need to expand + Replies = lists:map( + fun(?QUERY(FROM, _REQUEST, SENT, _EXPIRE_AT)) -> + ?REPLY(FROM, SENT, BatchResult) + end, + Batch + ), {ShouldAck, PostFns} = lists:foldl( fun(Reply, {_ShouldAck, PostFns}) -> @@ -712,9 +716,7 @@ batch_reply_caller_defer_metrics(Id, BatchResult, Batch, QueryOpts) -> {ShouldAck, [PostFn | PostFns]} end, {ack, []}, - %% the `Mod:on_batch_query/3` returns a single result for a batch, - %% so we need to expand - ?EXPAND(BatchResult, Batch) + Replies ), PostFn = fun() -> lists:foreach(fun(F) -> F() end, PostFns) end, {ShouldAck, PostFn}. @@ -726,9 +728,9 @@ reply_caller(Id, Reply, QueryOpts) -> %% Should only reply to the caller when the decision is final (not %% retriable). See comment on `handle_query_result_pure'. -reply_caller_defer_metrics(Id, ?REPLY(undefined, _, HasBeenSent, Result), _QueryOpts) -> +reply_caller_defer_metrics(Id, ?REPLY(undefined, HasBeenSent, Result), _QueryOpts) -> handle_query_result_pure(Id, Result, HasBeenSent); -reply_caller_defer_metrics(Id, ?REPLY({ReplyFun, Args}, _, HasBeenSent, Result), QueryOpts) when +reply_caller_defer_metrics(Id, ?REPLY({ReplyFun, Args}, HasBeenSent, Result), QueryOpts) when is_function(ReplyFun) -> IsSimpleQuery = maps:get(simple_query, QueryOpts, false), @@ -750,7 +752,7 @@ reply_caller_defer_metrics(Id, ?REPLY({ReplyFun, Args}, _, HasBeenSent, Result), ok end, {ShouldAck, PostFn}; -reply_caller_defer_metrics(Id, ?REPLY(From, _, HasBeenSent, Result), QueryOpts) -> +reply_caller_defer_metrics(Id, ?REPLY(From, HasBeenSent, Result), QueryOpts) -> IsSimpleQuery = maps:get(simple_query, QueryOpts, false), IsUnrecoverableError = is_unrecoverable_error(Result), {ShouldAck, PostFn} = handle_query_result_pure(Id, Result, HasBeenSent), @@ -989,7 +991,7 @@ do_reply_after_query( Index, InflightTID, Ref, - ?QUERY(From, Request, HasBeenSent, _ExpireAt), + ?QUERY(From, _Request, HasBeenSent, _ExpireAt), QueryOpts, Result ) -> @@ -997,14 +999,14 @@ do_reply_after_query( %% but received no ACK, NOT the number of messages queued in the %% inflight window. {Action, PostFn} = reply_caller_defer_metrics( - Id, ?REPLY(From, Request, HasBeenSent, Result), QueryOpts + Id, ?REPLY(From, HasBeenSent, Result), QueryOpts ), case Action of nack -> %% Keep retrying. ?tp(buffer_worker_reply_after_query, #{ action => Action, - batch_or_query => ?QUERY(From, Request, HasBeenSent, _ExpireAt), + batch_or_query => ?QUERY(From, _Request, HasBeenSent, _ExpireAt), ref => Ref, result => Result }), @@ -1013,7 +1015,7 @@ do_reply_after_query( ack -> ?tp(buffer_worker_reply_after_query, #{ action => Action, - batch_or_query => ?QUERY(From, Request, HasBeenSent, _ExpireAt), + batch_or_query => ?QUERY(From, _Request, HasBeenSent, _ExpireAt), ref => Ref, result => Result }), From db2f631a8a49289f30eb59624cd1c220de505c03 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 11:16:06 +0100 Subject: [PATCH 016/131] refactor(buffer_worker): simplify caller reply --- .../src/emqx_resource_buffer_worker.erl | 95 +++++++------------ 1 file changed, 36 insertions(+), 59 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 5460a8198..c7a061b61 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -210,7 +210,7 @@ running(cast, flush, Data) -> flush(Data); running(cast, block, St) -> {next_state, blocked, St}; -running(info, ?SEND_REQ(_From, _Req) = Request0, Data) -> +running(info, ?SEND_REQ(_ReplyTo, _Req) = Request0, Data) -> handle_query_requests(Request0, Data); running(info, {flush, Ref}, St = #{tref := {_TRef, Ref}}) -> flush(St#{tref := undefined}); @@ -238,7 +238,7 @@ blocked(cast, flush, Data) -> resume_from_blocked(Data); blocked(state_timeout, unblock, St) -> resume_from_blocked(St); -blocked(info, ?SEND_REQ(_ReqFrom, _Req) = Request0, Data0) -> +blocked(info, ?SEND_REQ(_ReplyTo, _Req) = Request0, Data0) -> Data = collect_and_enqueue_query_requests(Request0, Data0), {keep_state, Data}; blocked(info, {flush, _Ref}, _Data) -> @@ -284,7 +284,8 @@ pick_call(Id, Key, Query, Timeout) -> Caller = self(), MRef = erlang:monitor(process, Pid, [{alias, reply_demonitor}]), From = {Caller, MRef}, - erlang:send(Pid, ?SEND_REQ(From, Query)), + ReplyTo = {fun gen_statem:reply/2, [From]}, + erlang:send(Pid, ?SEND_REQ(ReplyTo, Query)), receive {MRef, Response} -> erlang:demonitor(MRef, [flush]), @@ -304,8 +305,8 @@ pick_call(Id, Key, Query, Timeout) -> pick_cast(Id, Key, Query) -> ?PICK(Id, Key, Pid, begin - From = undefined, - erlang:send(Pid, ?SEND_REQ(From, Query)), + ReplyTo = undefined, + erlang:send(Pid, ?SEND_REQ(ReplyTo, Query)), ok end). @@ -366,8 +367,8 @@ retry_inflight_sync(Ref, QueryOrBatch, Data0) -> Result = call_query(sync, Id, Index, Ref, QueryOrBatch, QueryOpts), ReplyResult = case QueryOrBatch of - ?QUERY(From, _, HasBeenSent, _ExpireAt) -> - Reply = ?REPLY(From, HasBeenSent, Result), + ?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt) -> + Reply = ?REPLY(ReplyTo, HasBeenSent, Result), reply_caller_defer_metrics(Id, Reply, QueryOpts); [?QUERY(_, _, _, _) | _] = Batch -> batch_reply_caller_defer_metrics(Id, Result, Batch, QueryOpts) @@ -421,15 +422,15 @@ collect_and_enqueue_query_requests(Request0, Data0) -> Queries = lists:map( fun - (?SEND_REQ(undefined = _From, {query, Req, Opts})) -> + (?SEND_REQ(undefined = _ReplyTo, {query, Req, Opts})) -> ReplyFun = maps:get(async_reply_fun, Opts, undefined), HasBeenSent = false, ExpireAt = maps:get(expire_at, Opts), ?QUERY(ReplyFun, Req, HasBeenSent, ExpireAt); - (?SEND_REQ(From, {query, Req, Opts})) -> + (?SEND_REQ(ReplyTo, {query, Req, Opts})) -> HasBeenSent = false, ExpireAt = maps:get(expire_at, Opts), - ?QUERY(From, Req, HasBeenSent, ExpireAt) + ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt) end, Requests ), @@ -439,17 +440,18 @@ collect_and_enqueue_query_requests(Request0, Data0) -> reply_overflown([]) -> ok; -reply_overflown([?QUERY(From, _Req, _HasBeenSent, _ExpireAt) | More]) -> - do_reply_caller(From, {error, buffer_overflow}), +reply_overflown([?QUERY(ReplyTo, _Req, _HasBeenSent, _ExpireAt) | More]) -> + do_reply_caller(ReplyTo, {error, buffer_overflow}), reply_overflown(More). do_reply_caller(undefined, _Result) -> ok; +do_reply_caller({F, Args}, {async_return, Result}) -> + %% this is an early return to async caller, the retry + %% decision has to be made by the caller + do_reply_caller({F, Args}, Result); do_reply_caller({F, Args}, Result) when is_function(F) -> _ = erlang:apply(F, Args ++ [Result]), - ok; -do_reply_caller(From, Result) -> - _ = gen_statem:reply(From, Result), ok. maybe_flush(Data0) -> @@ -544,10 +546,10 @@ do_flush( inflight_tid := InflightTID } = Data0, %% unwrap when not batching (i.e., batch size == 1) - [?QUERY(From, _, HasBeenSent, _ExpireAt) = Request] = Batch, + [?QUERY(ReplyTo, _, HasBeenSent, _ExpireAt) = Request] = Batch, QueryOpts = #{inflight_tid => InflightTID, simple_query => false}, Result = call_query(configured, Id, Index, Ref, Request, QueryOpts), - Reply = ?REPLY(From, HasBeenSent, Result), + Reply = ?REPLY(ReplyTo, HasBeenSent, Result), case reply_caller(Id, Reply, QueryOpts) of %% Failed; remove the request from the queue, as we cannot pop %% from it again, but we'll retry it using the inflight table. @@ -730,46 +732,21 @@ reply_caller(Id, Reply, QueryOpts) -> %% retriable). See comment on `handle_query_result_pure'. reply_caller_defer_metrics(Id, ?REPLY(undefined, HasBeenSent, Result), _QueryOpts) -> handle_query_result_pure(Id, Result, HasBeenSent); -reply_caller_defer_metrics(Id, ?REPLY({ReplyFun, Args}, HasBeenSent, Result), QueryOpts) when - is_function(ReplyFun) --> +reply_caller_defer_metrics(Id, ?REPLY(ReplyTo, HasBeenSent, Result), QueryOpts) -> IsSimpleQuery = maps:get(simple_query, QueryOpts, false), IsUnrecoverableError = is_unrecoverable_error(Result), {ShouldAck, PostFn} = handle_query_result_pure(Id, Result, HasBeenSent), case {ShouldAck, Result, IsUnrecoverableError, IsSimpleQuery} of {ack, {async_return, _}, true, _} -> - apply(ReplyFun, Args ++ [Result]), - ok; + ok = do_reply_caller(ReplyTo, Result); {ack, {async_return, _}, false, _} -> ok; {_, _, _, true} -> - apply(ReplyFun, Args ++ [Result]), - ok; + ok = do_reply_caller(ReplyTo, Result); {nack, _, _, _} -> ok; {ack, _, _, _} -> - apply(ReplyFun, Args ++ [Result]), - ok - end, - {ShouldAck, PostFn}; -reply_caller_defer_metrics(Id, ?REPLY(From, HasBeenSent, Result), QueryOpts) -> - IsSimpleQuery = maps:get(simple_query, QueryOpts, false), - IsUnrecoverableError = is_unrecoverable_error(Result), - {ShouldAck, PostFn} = handle_query_result_pure(Id, Result, HasBeenSent), - case {ShouldAck, Result, IsUnrecoverableError, IsSimpleQuery} of - {ack, {async_return, _}, true, _} -> - gen_statem:reply(From, Result), - ok; - {ack, {async_return, _}, false, _} -> - ok; - {_, _, _, true} -> - gen_statem:reply(From, Result), - ok; - {nack, _, _, _} -> - ok; - {ack, _, _, _} -> - gen_statem:reply(From, Result), - ok + ok = do_reply_caller(ReplyTo, Result) end, {ShouldAck, PostFn}. @@ -935,7 +912,7 @@ apply_query_fun(sync, Mod, Id, _Index, _Ref, [?QUERY(_, _, _, _) | _] = Batch, R ?tp(call_batch_query, #{ id => Id, mod => Mod, batch => Batch, res_st => ResSt, call_mode => sync }), - Requests = [Request || ?QUERY(_From, Request, _, _ExpireAt) <- Batch], + Requests = [Request || ?QUERY(_ReplyTo, Request, _, _ExpireAt) <- Batch], ?APPLY_RESOURCE(call_batch_query, Mod:on_batch_query(Id, Requests, ResSt), Batch); apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, ResSt, QueryOpts) -> ?tp(call_batch_query_async, #{ @@ -947,7 +924,7 @@ apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, Re begin ReplyFun = fun ?MODULE:batch_reply_after_query/8, ReplyFunAndArgs = {ReplyFun, [self(), Id, Index, InflightTID, Ref, Batch, QueryOpts]}, - Requests = [Request || ?QUERY(_From, Request, _, _ExpireAt) <- Batch], + Requests = [Request || ?QUERY(_ReplyTo, Request, _, _ExpireAt) <- Batch], IsRetriable = false, WorkerMRef = undefined, InflightItem = ?INFLIGHT_ITEM(Ref, Batch, IsRetriable, WorkerMRef), @@ -964,7 +941,7 @@ reply_after_query( Index, InflightTID, Ref, - ?QUERY(_From, _Request, _HasBeenSent, ExpireAt) = Query, + ?QUERY(_ReplyTo, _Request, _HasBeenSent, ExpireAt) = Query, QueryOpts, Result ) -> @@ -991,7 +968,7 @@ do_reply_after_query( Index, InflightTID, Ref, - ?QUERY(From, _Request, HasBeenSent, _ExpireAt), + ?QUERY(ReplyTo, _Request, HasBeenSent, _ExpireAt), QueryOpts, Result ) -> @@ -999,14 +976,14 @@ do_reply_after_query( %% but received no ACK, NOT the number of messages queued in the %% inflight window. {Action, PostFn} = reply_caller_defer_metrics( - Id, ?REPLY(From, HasBeenSent, Result), QueryOpts + Id, ?REPLY(ReplyTo, HasBeenSent, Result), QueryOpts ), case Action of nack -> %% Keep retrying. ?tp(buffer_worker_reply_after_query, #{ action => Action, - batch_or_query => ?QUERY(From, _Request, HasBeenSent, _ExpireAt), + batch_or_query => ?QUERY(ReplyTo, _Request, HasBeenSent, _ExpireAt), ref => Ref, result => Result }), @@ -1015,7 +992,7 @@ do_reply_after_query( ack -> ?tp(buffer_worker_reply_after_query, #{ action => Action, - batch_or_query => ?QUERY(From, _Request, HasBeenSent, _ExpireAt), + batch_or_query => ?QUERY(ReplyTo, _Request, HasBeenSent, _ExpireAt), ref => Ref, result => Result }), @@ -1175,7 +1152,7 @@ inflight_get_first_retriable(InflightTID, Now) -> case ets:select(InflightTID, MatchSpec, _Limit = 1) of '$end_of_table' -> none; - {[{Ref, Query = ?QUERY(_From, _CoreReq, _HasBeenSent, ExpireAt)}], _Continuation} -> + {[{Ref, Query = ?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt)}], _Continuation} -> case is_expired(ExpireAt, Now) of true -> {expired, Ref, [Query]}; @@ -1234,7 +1211,7 @@ inflight_append( inflight_append( InflightTID, ?INFLIGHT_ITEM( - Ref, ?QUERY(_From, _Req, _HasBeenSent, _ExpireAt) = Query0, IsRetriable, WorkerMRef + Ref, ?QUERY(_ReplyTo, _Req, _HasBeenSent, _ExpireAt) = Query0, IsRetriable, WorkerMRef ), Id, Index @@ -1405,7 +1382,7 @@ do_collect_requests(Acc, Count, Limit) when Count >= Limit -> lists:reverse(Acc); do_collect_requests(Acc, Count, Limit) -> receive - ?SEND_REQ(_From, _Req) = Request -> + ?SEND_REQ(_ReplyTo, _Req) = Request -> do_collect_requests([Request | Acc], Count + 1, Limit) after 0 -> lists:reverse(Acc) @@ -1413,9 +1390,9 @@ do_collect_requests(Acc, Count, Limit) -> mark_as_sent(Batch) when is_list(Batch) -> lists:map(fun mark_as_sent/1, Batch); -mark_as_sent(?QUERY(From, Req, _HasBeenSent, ExpireAt)) -> +mark_as_sent(?QUERY(ReplyTo, Req, _HasBeenSent, ExpireAt)) -> HasBeenSent = true, - ?QUERY(From, Req, HasBeenSent, ExpireAt). + ?QUERY(ReplyTo, Req, HasBeenSent, ExpireAt). is_unrecoverable_error({error, {unrecoverable_error, _}}) -> true; @@ -1439,7 +1416,7 @@ is_async_return(_) -> sieve_expired_requests(Batch, Now) -> {Expired, NotExpired} = lists:partition( - fun(?QUERY(_From, _CoreReq, _HasBeenSent, ExpireAt)) -> + fun(?QUERY(_ReplyTo, _CoreReq, _HasBeenSent, ExpireAt)) -> is_expired(ExpireAt, Now) end, Batch From d5f62d917e5ff996517676b13d8bc26681c812aa Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 11:40:05 +0100 Subject: [PATCH 017/131] chore: upgrade ehttpc and ecpool --- mix.exs | 4 ++-- rebar.config | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mix.exs b/mix.exs index 315212fbe..623321f29 100644 --- a/mix.exs +++ b/mix.exs @@ -47,7 +47,7 @@ defmodule EMQXUmbrella.MixProject do {:lc, github: "emqx/lc", tag: "0.3.2", override: true}, {:redbug, "2.0.8"}, {:typerefl, github: "ieQu1/typerefl", tag: "0.9.1", override: true}, - {:ehttpc, github: "emqx/ehttpc", tag: "0.4.4", override: true}, + {:ehttpc, github: "emqx/ehttpc", tag: "0.4.5", override: true}, {:gproc, github: "uwiger/gproc", tag: "0.8.0", override: true}, {:jiffy, github: "emqx/jiffy", tag: "1.0.5", override: true}, {:cowboy, github: "emqx/cowboy", tag: "2.9.0", override: true}, @@ -57,7 +57,7 @@ defmodule EMQXUmbrella.MixProject do {:gen_rpc, github: "emqx/gen_rpc", tag: "2.8.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.7", override: true}, {:minirest, github: "emqx/minirest", tag: "1.3.7", override: true}, - {:ecpool, github: "emqx/ecpool", tag: "0.5.2", override: true}, + {:ecpool, github: "emqx/ecpool", tag: "0.5.3", override: true}, {:replayq, github: "emqx/replayq", tag: "0.3.6", override: true}, {:pbkdf2, github: "emqx/erlang-pbkdf2", tag: "2.0.4", override: true}, {:emqtt, github: "emqx/emqtt", tag: "1.7.0-rc.2", override: true}, diff --git a/rebar.config b/rebar.config index a1b4df520..71a54a03d 100644 --- a/rebar.config +++ b/rebar.config @@ -49,7 +49,7 @@ , {gpb, "4.19.5"} %% gpb only used to build, but not for release, pin it here to avoid fetching a wrong version due to rebar plugins scattered in all the deps , {typerefl, {git, "https://github.com/ieQu1/typerefl", {tag, "0.9.1"}}} , {gun, {git, "https://github.com/emqx/gun", {tag, "1.3.9"}}} - , {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.4"}}} + , {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.5"}}} , {gproc, {git, "https://github.com/uwiger/gproc", {tag, "0.8.0"}}} , {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}} , {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.0"}}} @@ -59,7 +59,7 @@ , {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}} , {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.7"}}} , {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.7"}}} - , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.2"}}} + , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.3"}}} , {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.6"}}} , {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}} , {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.7.0-rc.2"}}} From b7e3f9d5a6b539d3a09e7886b579e3b51d32e543 Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Mon, 23 Jan 2023 13:57:49 +0100 Subject: [PATCH 018/131] fix: try-case-of rather than try-of try-of catches only what happens within but not after --- .../src/emqx_resource_buffer_worker.erl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 5460a8198..5b8524f39 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -266,11 +266,13 @@ code_change(_OldVsn, State, _Extra) -> %%============================================================================== -define(PICK(ID, KEY, PID, EXPR), - try gproc_pool:pick_worker(ID, KEY) of - PID when is_pid(PID) -> - EXPR; - _ -> - ?RESOURCE_ERROR(worker_not_created, "resource not created") + try + case gproc_pool:pick_worker(ID, KEY) of + PID when is_pid(PID) -> + EXPR; + _ -> + ?RESOURCE_ERROR(worker_not_created, "resource not created") + end catch error:badarg -> ?RESOURCE_ERROR(worker_not_created, "resource not created"); From a180bd9aa5197b93aaaa87c266bdc4c7a55d99d3 Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Mon, 23 Jan 2023 13:56:33 +0100 Subject: [PATCH 019/131] fix: catch error, not exit --- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 5b8524f39..0b0bf94aa 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -20,7 +20,6 @@ -module(emqx_resource_buffer_worker). -include("emqx_resource.hrl"). --include("emqx_resource_utils.hrl"). -include("emqx_resource_errors.hrl"). -include_lib("emqx/include/logger.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). @@ -276,7 +275,7 @@ code_change(_OldVsn, State, _Extra) -> catch error:badarg -> ?RESOURCE_ERROR(worker_not_created, "resource not created"); - exit:{timeout, _} -> + error:timeout -> ?RESOURCE_ERROR(timeout, "call resource timeout") end ). From 8f3b1f87444d4b9d77d400e9f341a6cdc54dca1f Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Mon, 23 Jan 2023 16:37:24 +0100 Subject: [PATCH 020/131] chore: add changelog --- changes/v5.0.16/fix-9832.en.md | 1 + changes/v5.0.16/fix-9832.zh.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 changes/v5.0.16/fix-9832.en.md create mode 100644 changes/v5.0.16/fix-9832.zh.md diff --git a/changes/v5.0.16/fix-9832.en.md b/changes/v5.0.16/fix-9832.en.md new file mode 100644 index 000000000..84178b63c --- /dev/null +++ b/changes/v5.0.16/fix-9832.en.md @@ -0,0 +1 @@ +Improve error log when bridge in 'sync' mode timed out to get response. diff --git a/changes/v5.0.16/fix-9832.zh.md b/changes/v5.0.16/fix-9832.zh.md new file mode 100644 index 000000000..e7fd33b6b --- /dev/null +++ b/changes/v5.0.16/fix-9832.zh.md @@ -0,0 +1 @@ +优化桥接同步资源调用超时情况下的一个错误日志。 From 2d62de518802e26695c53cc54d1f39e385f5c50a Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Mon, 23 Jan 2023 16:42:23 +0100 Subject: [PATCH 021/131] test: fix expected result from timeout error --- .../test/emqx_resource_SUITE.erl | 27 ++++++++++--------- .../test/emqx_ee_bridge_influxdb_SUITE.erl | 8 +++--- .../test/emqx_ee_bridge_mysql_SUITE.erl | 9 ++++--- .../test/emqx_ee_bridge_pgsql_SUITE.erl | 12 ++++----- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 34a92a5a2..227b6fedc 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -19,8 +19,6 @@ -compile(export_all). -include_lib("eunit/include/eunit.hrl"). --include_lib("common_test/include/ct.hrl"). --include("emqx_resource.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). @@ -772,7 +770,10 @@ t_healthy_timeout(_) -> %% the ?TEST_RESOURCE always returns the `Mod:on_get_status/2` 300ms later. #{health_check_interval => 200} ), - ?assertError(timeout, emqx_resource:query(?ID, get_state, #{timeout => 1_000})), + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, + emqx_resource:query(?ID, get_state, #{timeout => 1_000}) + ), ?assertMatch({ok, _Group, #{status := disconnected}}, emqx_resource_manager:ets_lookup(?ID)), ok = emqx_resource:remove_local(?ID). @@ -1583,8 +1584,8 @@ do_t_expiration_before_sending(QueryMode) -> spawn_link(fun() -> case QueryMode of sync -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query(?ID, {inc_counter, 99}, #{timeout => TimeoutMS}) ); async -> @@ -1690,8 +1691,8 @@ do_t_expiration_before_sending_partial_batch(QueryMode) -> spawn_link(fun() -> case QueryMode of sync -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query(?ID, {inc_counter, 199}, #{timeout => TimeoutMS}) ); async -> @@ -2043,8 +2044,8 @@ do_t_expiration_retry(IsBatch) -> ResumeInterval * 2 ), spawn_link(fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query( ?ID, {inc_counter, 1}, @@ -2127,8 +2128,8 @@ t_expiration_retry_batch_multiple_times(_Config) -> ), TimeoutMS = 100, spawn_link(fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query( ?ID, {inc_counter, 1}, @@ -2137,8 +2138,8 @@ t_expiration_retry_batch_multiple_times(_Config) -> ) end), spawn_link(fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, emqx_resource:query( ?ID, {inc_counter, 2}, diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl index e1899b1b2..2bac20bce 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl @@ -910,12 +910,10 @@ t_write_failure(Config) -> sync -> {_, {ok, _}} = ?wait_async_action( - try + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, send_message(Config, SentData) - catch - error:timeout -> - {error, timeout} - end, + ), #{?snk_kind := buffer_worker_flush_nack}, 1_000 ); diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl index 57792b366..fec85c874 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_mysql_SUITE.erl @@ -406,7 +406,10 @@ t_write_failure(Config) -> emqx_common_test_helpers:with_failure(down, ProxyName, ProxyHost, ProxyPort, fun() -> case QueryMode of sync -> - ?assertError(timeout, send_message(Config, SentData)); + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, + send_message(Config, SentData) + ); async -> send_message(Config, SentData) end @@ -439,8 +442,8 @@ t_write_timeout(Config) -> SentData = #{payload => Val, timestamp => 1668602148000}, Timeout = 1000, emqx_common_test_helpers:with_failure(timeout, ProxyName, ProxyHost, ProxyPort, fun() -> - ?assertError( - timeout, + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, query_resource(Config, {send_message, SentData, [], Timeout}) ) end), diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl index 25752f685..6fbb9689f 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_pgsql_SUITE.erl @@ -426,12 +426,7 @@ t_write_failure(Config) -> ?wait_async_action( case QueryMode of sync -> - try - send_message(Config, SentData) - catch - error:timeout -> - {error, timeout} - end; + ?assertMatch({error, _}, send_message(Config, SentData)); async -> send_message(Config, SentData) end, @@ -467,7 +462,10 @@ t_write_timeout(Config) -> SentData = #{payload => Val, timestamp => 1668602148000}, Timeout = 1000, emqx_common_test_helpers:with_failure(timeout, ProxyName, ProxyHost, ProxyPort, fun() -> - ?assertError(timeout, query_resource(Config, {send_message, SentData, [], Timeout})) + ?assertMatch( + {error, {resource_error, #{reason := timeout}}}, + query_resource(Config, {send_message, SentData, [], Timeout}) + ) end), ok. From 7005b71ddfca5ee1c13a7902984c56b081f0faf7 Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Tue, 24 Jan 2023 16:57:06 +0100 Subject: [PATCH 022/131] style: fix typo in comment --- apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index a99f06f20..cd5a17184 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -899,7 +899,7 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> ), Payload1 = <<"hello2">>, Payload2 = <<"hello3">>, - %% we need to to it in other processes because it'll block due to + %% We need to do it in other processes because it'll block due to %% the long timeout spawn(fun() -> emqx:publish(emqx_message:make(LocalTopic, Payload1)) end), spawn(fun() -> emqx:publish(emqx_message:make(LocalTopic, Payload2)) end), From 7d18128ba9dbae71b2af3fc9a55fdbc2b812ddca Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Wed, 25 Jan 2023 11:46:52 +0100 Subject: [PATCH 023/131] test: async write can return noproc --- lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl index 2bac20bce..cd7f848c2 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl @@ -945,7 +945,8 @@ t_write_failure(Config) -> {error, {recoverable_error, {closed, "The connection was lost."}}} =:= Result orelse {error, {error, closed}} =:= Result orelse - {error, {recoverable_error, econnrefused}} =:= Result, + {error, {recoverable_error, econnrefused}} =:= Result orelse + {error, {recoverable_error, noproc}} =:= Result, #{got => Result} ) end, From 30a8a436b4019719764ccfeb3c237624f2ced0dd Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 11:41:24 +0100 Subject: [PATCH 024/131] fix(influxdb_bridge): no need to create atom for pool name --- lib-ee/emqx_ee_connector/rebar.config | 2 +- lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib-ee/emqx_ee_connector/rebar.config b/lib-ee/emqx_ee_connector/rebar.config index 3af1868c7..00421e4f6 100644 --- a/lib-ee/emqx_ee_connector/rebar.config +++ b/lib-ee/emqx_ee_connector/rebar.config @@ -1,7 +1,7 @@ {erl_opts, [debug_info]}. {deps, [ {hstreamdb_erl, {git, "https://github.com/hstreamdb/hstreamdb_erl.git", {tag, "0.2.5"}}}, - {influxdb, {git, "https://github.com/emqx/influxdb-client-erl", {tag, "1.1.7"}}}, + {influxdb, {git, "https://github.com/emqx/influxdb-client-erl", {tag, "1.1.8"}}}, {emqx, {path, "../../apps/emqx"}} ]}. diff --git a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl index 0ee27c5c4..fa1ce1090 100644 --- a/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl +++ b/lib-ee/emqx_ee_connector/src/emqx_ee_connector_influxdb.erl @@ -282,7 +282,7 @@ client_config( {host, str(Host)}, {port, Port}, {pool_size, erlang:system_info(schedulers)}, - {pool, binary_to_atom(InstId, utf8)}, + {pool, InstId}, {precision, atom_to_binary(maps:get(precision, Config, ms), utf8)} ] ++ protocol_config(Config). From d53106145f9422c2cbd966ddbb3fdb4bc9451b7c Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 12:15:07 +0100 Subject: [PATCH 025/131] fix: stop resource when resource manager terminates --- apps/emqx_resource/src/emqx_resource_manager.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 8098dbe42..d2cb74437 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -309,6 +309,7 @@ init({Data, Opts}) -> end. terminate(_Reason, _State, Data) -> + _ = stop_resource(Data), _ = maybe_clear_alarm(Data#data.id), delete_cache(Data#data.id, Data#data.manager_id), ok. @@ -334,8 +335,7 @@ handle_event({call, From}, start, _State, _Data) -> % Called when the resource received a `quit` message handle_event(info, quit, stopped, _Data) -> {stop, {shutdown, quit}}; -handle_event(info, quit, _State, Data) -> - _ = stop_resource(Data), +handle_event(info, quit, _State, _Data) -> {stop, {shutdown, quit}}; % Called when the resource is to be stopped handle_event({call, From}, stop, stopped, _Data) -> From c47be57c596ae7e1d65bc22af4485f485545ee6e Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 12:38:00 +0100 Subject: [PATCH 026/131] fix(bridge): ensure all bridge resources are stopped before app stop --- apps/emqx_bridge/src/emqx_bridge.erl | 16 ++++++++++++++++ apps/emqx_bridge/src/emqx_bridge_app.erl | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/apps/emqx_bridge/src/emqx_bridge.erl b/apps/emqx_bridge/src/emqx_bridge.erl index fb199522d..a25963190 100644 --- a/apps/emqx_bridge/src/emqx_bridge.erl +++ b/apps/emqx_bridge/src/emqx_bridge.erl @@ -31,6 +31,7 @@ -export([ load/0, + unload/0, lookup/1, lookup/2, lookup/3, @@ -75,6 +76,21 @@ load() -> maps:to_list(Bridges) ). +unload() -> + unload_hook(), + Bridges = emqx:get_config([bridges], #{}), + lists:foreach( + fun({Type, NamedConf}) -> + lists:foreach( + fun({Name, _Conf}) -> + _ = emqx_bridge_resource:stop(Type, Name) + end, + maps:to_list(NamedConf) + ) + end, + maps:to_list(Bridges) + ). + safe_load_bridge(Type, Name, Conf, Opts) -> try _Res = emqx_bridge_resource:create(Type, Name, Conf, Opts), diff --git a/apps/emqx_bridge/src/emqx_bridge_app.erl b/apps/emqx_bridge/src/emqx_bridge_app.erl index e10034bae..daae15a17 100644 --- a/apps/emqx_bridge/src/emqx_bridge_app.erl +++ b/apps/emqx_bridge/src/emqx_bridge_app.erl @@ -39,7 +39,7 @@ start(_StartType, _StartArgs) -> stop(_State) -> emqx_conf:remove_handler(?LEAF_NODE_HDLR_PATH), emqx_conf:remove_handler(?TOP_LELVE_HDLR_PATH), - ok = emqx_bridge:unload_hook(), + ok = emqx_bridge:unload(), ok. -if(?EMQX_RELEASE_EDITION == ee). From 6a58bafcb02f6671e3001abf6bfeea48157dbb6c Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 14:38:21 +0100 Subject: [PATCH 027/131] chore: bump release version to e5.0.0-rc.2 --- apps/emqx/include/emqx_release.hrl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index 7437bc299..d3e0cd3b0 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -35,7 +35,7 @@ -define(EMQX_RELEASE_CE, "5.0.15"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.0.0-rc.1"). +-define(EMQX_RELEASE_EE, "5.0.0-rc.2"). %% the HTTP API version -define(EMQX_API_VERSION, "5.0"). From 262c3a286954ca20923ff0db97b64c68f06cf7ba Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 15:03:18 +0100 Subject: [PATCH 028/131] refactor(buffer_worker): rename function from reply_after_query to handle_async_reply --- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 50534df4f..58c835533 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -52,7 +52,7 @@ -export([queue_item_marshaller/1, estimate_size/1]). --export([reply_after_query/8, batch_reply_after_query/8]). +-export([handle_async_reply/8, batch_reply_after_query/8]). -export([clear_disk_queue_dir/2]). @@ -898,7 +898,7 @@ apply_query_fun(async, Mod, Id, Index, Ref, ?QUERY(_, Request, _, _) = Query, Re ?APPLY_RESOURCE( call_query_async, begin - ReplyFun = fun ?MODULE:reply_after_query/8, + ReplyFun = fun ?MODULE:handle_async_reply/8, Args = [self(), Id, Index, InflightTID, Ref, Query, QueryOpts], IsRetriable = false, WorkerMRef = undefined, @@ -936,7 +936,7 @@ apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, Re Batch ). -reply_after_query( +handle_async_reply( Pid, Id, Index, From f793807bc1fd9e0216ee09f61bd863e5c8306086 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 15:04:28 +0100 Subject: [PATCH 029/131] refactor(buffer_worker): rename function batch_reply_after_query to handle_async_batch_reply --- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 58c835533..1b837880e 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -52,7 +52,7 @@ -export([queue_item_marshaller/1, estimate_size/1]). --export([handle_async_reply/8, batch_reply_after_query/8]). +-export([handle_async_reply/8, handle_async_batch_reply/8]). -export([clear_disk_queue_dir/2]). @@ -923,7 +923,7 @@ apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, Re ?APPLY_RESOURCE( call_batch_query_async, begin - ReplyFun = fun ?MODULE:batch_reply_after_query/8, + ReplyFun = fun ?MODULE:handle_async_batch_reply/8, ReplyFunAndArgs = {ReplyFun, [self(), Id, Index, InflightTID, Ref, Batch, QueryOpts]}, Requests = [Request || ?QUERY(_ReplyTo, Request, _, _ExpireAt) <- Batch], IsRetriable = false, @@ -1000,7 +1000,7 @@ do_reply_after_query( do_ack(InflightTID, Ref, Id, Index, PostFn, Pid, QueryOpts) end. -batch_reply_after_query(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Result) -> +handle_async_batch_reply(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Result) -> ?tp( buffer_worker_reply_after_query_enter, #{batch_or_query => Batch, ref => Ref} From 578271ea3d8a3b3b3230cd0bbbe2a4b7294232b6 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 15:15:46 +0100 Subject: [PATCH 030/131] refactor: use lists:map instead of lc for safty --- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 1b837880e..026effcf8 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -73,9 +73,8 @@ -type id() :: binary(). -type index() :: pos_integer(). -type expire_at() :: infinity | integer(). --type queue_query() :: ?QUERY(from(), request(), HasBeenSent :: boolean(), expire_at()). +-type queue_query() :: ?QUERY(reply_fun(), request(), HasBeenSent :: boolean(), expire_at()). -type request() :: term(). --type from() :: pid() | reply_fun() | request_from(). -type request_from() :: undefined | gen_statem:from(). -type state() :: blocked | running. -type inflight_key() :: integer(). @@ -913,7 +912,7 @@ apply_query_fun(sync, Mod, Id, _Index, _Ref, [?QUERY(_, _, _, _) | _] = Batch, R ?tp(call_batch_query, #{ id => Id, mod => Mod, batch => Batch, res_st => ResSt, call_mode => sync }), - Requests = [Request || ?QUERY(_ReplyTo, Request, _, _ExpireAt) <- Batch], + Requests = lists:map(fun(?QUERY(_ReplyTo, Request, _, _ExpireAt)) -> Request end, Batch), ?APPLY_RESOURCE(call_batch_query, Mod:on_batch_query(Id, Requests, ResSt), Batch); apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, ResSt, QueryOpts) -> ?tp(call_batch_query_async, #{ @@ -925,7 +924,9 @@ apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, Re begin ReplyFun = fun ?MODULE:handle_async_batch_reply/8, ReplyFunAndArgs = {ReplyFun, [self(), Id, Index, InflightTID, Ref, Batch, QueryOpts]}, - Requests = [Request || ?QUERY(_ReplyTo, Request, _, _ExpireAt) <- Batch], + Requests = lists:map( + fun(?QUERY(_ReplyTo, Request, _, _ExpireAt)) -> Request end, Batch + ), IsRetriable = false, WorkerMRef = undefined, InflightItem = ?INFLIGHT_ITEM(Ref, Batch, IsRetriable, WorkerMRef), From 72f39b9b728bd8dbd4ddba8279039cf8a29c5b3d Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Fri, 27 Jan 2023 17:39:16 +0200 Subject: [PATCH 031/131] fix(docs): correct Redis conf field description --- apps/emqx_connector/i18n/emqx_connector_redis.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_connector/i18n/emqx_connector_redis.conf b/apps/emqx_connector/i18n/emqx_connector_redis.conf index e8e05d08f..e42e73498 100644 --- a/apps/emqx_connector/i18n/emqx_connector_redis.conf +++ b/apps/emqx_connector/i18n/emqx_connector_redis.conf @@ -69,7 +69,7 @@ The Redis default port 6379 is used if `[:Port]` is not specified. A Node list for Cluster to connect to. The nodes should be separated with commas, such as: `Node[,Node].` For each Node should be: The IPv4 or IPv6 address or the hostname to connect to. A host entry has the following form: `Host[:Port]`. -The MongoDB default port 27017 is used if `[:Port]` is not specified. +The Redis default port 6379 is used if `[:Port]` is not specified. """ zh: """ From fc38ea9571337c9ba7bf3e72c423f4f7bd1e1c6f Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 17:12:55 +0100 Subject: [PATCH 032/131] refactor(buffer_worker): do not keep request body in reply context the request body can be potentially very large the reply context is sent to the async call handler and kept in its memory until the async reply is received from bridge target service. this commit tries to minimize the size of the reply context by replacing the request body with `[]`. --- .../src/emqx_resource_buffer_worker.erl | 161 +++++++++++------- .../test/emqx_resource_SUITE.erl | 2 +- 2 files changed, 102 insertions(+), 61 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 026effcf8..355fb276c 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -52,7 +52,7 @@ -export([queue_item_marshaller/1, estimate_size/1]). --export([handle_async_reply/8, handle_async_batch_reply/8]). +-export([handle_async_reply/2, handle_async_batch_reply/2]). -export([clear_disk_queue_dir/2]). @@ -124,7 +124,7 @@ simple_sync_query(Id, Request) -> Index = undefined, QueryOpts = simple_query_opts(), emqx_resource_metrics:matched_inc(Id), - Ref = make_message_ref(), + Ref = make_request_ref(), Result = call_query(sync, Id, Index, Ref, ?SIMPLE_QUERY(Request), QueryOpts), _ = handle_query_result(Id, Result, _HasBeenSent = false), Result. @@ -135,7 +135,7 @@ simple_async_query(Id, Request) -> Index = undefined, QueryOpts = simple_query_opts(), emqx_resource_metrics:matched_inc(Id), - Ref = make_message_ref(), + Ref = make_request_ref(), Result = call_query(async, Id, Index, Ref, ?SIMPLE_QUERY(Request), QueryOpts), _ = handle_query_result(Id, Result, _HasBeenSent = false), Result. @@ -511,7 +511,7 @@ flush(Data0) -> buffer_worker_flush_potentially_partial, #{expired => Expired, not_expired => NotExpired} ), - Ref = make_message_ref(), + Ref = make_request_ref(), do_flush(Data2, #{ new_queue => Q1, is_batch => IsBatch, @@ -897,13 +897,21 @@ apply_query_fun(async, Mod, Id, Index, Ref, ?QUERY(_, Request, _, _) = Query, Re ?APPLY_RESOURCE( call_query_async, begin - ReplyFun = fun ?MODULE:handle_async_reply/8, - Args = [self(), Id, Index, InflightTID, Ref, Query, QueryOpts], + ReplyFun = fun ?MODULE:handle_async_reply/2, + ReplyContext = #{ + buffer_worker => self(), + resource_id => Id, + worker_index => Index, + inflight_tid => InflightTID, + request_ref => Ref, + query_opts => QueryOpts, + query => minimize(Query) + }, IsRetriable = false, WorkerMRef = undefined, InflightItem = ?INFLIGHT_ITEM(Ref, Query, IsRetriable, WorkerMRef), ok = inflight_append(InflightTID, InflightItem, Id, Index), - Result = Mod:on_query_async(Id, Request, {ReplyFun, Args}, ResSt), + Result = Mod:on_query_async(Id, Request, {ReplyFun, [ReplyContext]}, ResSt), {async_return, Result} end, Request @@ -922,8 +930,16 @@ apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, Re ?APPLY_RESOURCE( call_batch_query_async, begin - ReplyFun = fun ?MODULE:handle_async_batch_reply/8, - ReplyFunAndArgs = {ReplyFun, [self(), Id, Index, InflightTID, Ref, Batch, QueryOpts]}, + ReplyFun = fun ?MODULE:handle_async_batch_reply/2, + ReplyContext = #{ + buffer_worker => self(), + resource_id => Id, + worker_index => Index, + inflight_tid => InflightTID, + request_ref => Ref, + query_opts => QueryOpts, + batch => minimize(Batch) + }, Requests = lists:map( fun(?QUERY(_ReplyTo, Request, _, _ExpireAt)) -> Request end, Batch ), @@ -931,20 +947,21 @@ apply_query_fun(async, Mod, Id, Index, Ref, [?QUERY(_, _, _, _) | _] = Batch, Re WorkerMRef = undefined, InflightItem = ?INFLIGHT_ITEM(Ref, Batch, IsRetriable, WorkerMRef), ok = inflight_append(InflightTID, InflightItem, Id, Index), - Result = Mod:on_batch_query_async(Id, Requests, ReplyFunAndArgs, ResSt), + Result = Mod:on_batch_query_async(Id, Requests, {ReplyFun, [ReplyContext]}, ResSt), {async_return, Result} end, Batch ). handle_async_reply( - Pid, - Id, - Index, - InflightTID, - Ref, - ?QUERY(_ReplyTo, _Request, _HasBeenSent, ExpireAt) = Query, - QueryOpts, + #{ + request_ref := Ref, + inflight_tid := InflightTID, + resource_id := Id, + worker_index := Index, + buffer_worker := Pid, + query := ?QUERY(_, _, _, ExpireAt) = Query + } = ReplyContext, Result ) -> ?tp( @@ -961,47 +978,55 @@ handle_async_reply( ?tp(buffer_worker_reply_after_query_expired, #{expired => [Query]}), ok; false -> - do_reply_after_query(Pid, Id, Index, InflightTID, Ref, Query, QueryOpts, Result) + do_handle_async_reply(ReplyContext, Result) end. -do_reply_after_query( - Pid, - Id, - Index, - InflightTID, - Ref, - ?QUERY(ReplyTo, _Request, HasBeenSent, _ExpireAt), - QueryOpts, +do_handle_async_reply( + #{ + query_opts := QueryOpts, + resource_id := Id, + request_ref := Ref, + worker_index := Index, + buffer_worker := Pid, + inflight_tid := InflightTID, + query := ?QUERY(ReplyTo, _, Sent, _ExpireAt) = Query + }, Result ) -> %% NOTE: 'inflight' is the count of messages that were sent async %% but received no ACK, NOT the number of messages queued in the %% inflight window. {Action, PostFn} = reply_caller_defer_metrics( - Id, ?REPLY(ReplyTo, HasBeenSent, Result), QueryOpts + Id, ?REPLY(ReplyTo, Sent, Result), QueryOpts ), + + ?tp(buffer_worker_reply_after_query, #{ + action => Action, + batch_or_query => [Query], + ref => Ref, + result => Result + }), + case Action of nack -> %% Keep retrying. - ?tp(buffer_worker_reply_after_query, #{ - action => Action, - batch_or_query => ?QUERY(ReplyTo, _Request, HasBeenSent, _ExpireAt), - ref => Ref, - result => Result - }), mark_inflight_as_retriable(InflightTID, Ref), ?MODULE:block(Pid); ack -> - ?tp(buffer_worker_reply_after_query, #{ - action => Action, - batch_or_query => ?QUERY(ReplyTo, _Request, HasBeenSent, _ExpireAt), - ref => Ref, - result => Result - }), do_ack(InflightTID, Ref, Id, Index, PostFn, Pid, QueryOpts) end. -handle_async_batch_reply(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Result) -> +handle_async_batch_reply( + #{ + buffer_worker := Pid, + resource_id := Id, + worker_index := Index, + inflight_tid := InflightTID, + request_ref := Ref, + batch := Batch + } = ReplyContext, + Result +) -> ?tp( buffer_worker_reply_after_query_enter, #{batch_or_query => Batch, ref => Ref} @@ -1020,12 +1045,21 @@ handle_async_batch_reply(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Res emqx_resource_metrics:late_reply_inc(Id, NumExpired), NumExpired > 0 andalso ?tp(buffer_worker_reply_after_query_expired, #{expired => Expired}), - do_batch_reply_after_query( - Pid, Id, Index, InflightTID, Ref, NotExpired, QueryOpts, Result - ) + do_handle_async_batch_reply(ReplyContext#{batch := NotExpired}, Result) end. -do_batch_reply_after_query(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, Result) -> +do_handle_async_batch_reply( + #{ + buffer_worker := Pid, + resource_id := Id, + worker_index := Index, + inflight_tid := InflightTID, + request_ref := Ref, + batch := Batch, + query_opts := QueryOpts + }, + Result +) -> ?tp( buffer_worker_reply_after_query_enter, #{batch_or_query => Batch, ref => Ref} @@ -1034,24 +1068,18 @@ do_batch_reply_after_query(Pid, Id, Index, InflightTID, Ref, Batch, QueryOpts, R %% but received no ACK, NOT the number of messages queued in the %% inflight window. {Action, PostFn} = batch_reply_caller_defer_metrics(Id, Result, Batch, QueryOpts), + ?tp(buffer_worker_reply_after_query, #{ + action => Action, + batch_or_query => Batch, + ref => Ref, + result => Result + }), case Action of nack -> %% Keep retrying. - ?tp(buffer_worker_reply_after_query, #{ - action => nack, - batch_or_query => Batch, - ref => Ref, - result => Result - }), mark_inflight_as_retriable(InflightTID, Ref), ?MODULE:block(Pid); ack -> - ?tp(buffer_worker_reply_after_query, #{ - action => ack, - batch_or_query => Batch, - ref => Ref, - result => Result - }), do_ack(InflightTID, Ref, Id, Index, PostFn, Pid, QueryOpts) end. @@ -1098,7 +1126,8 @@ append_queue(Id, Index, Q, Queries) -> emqx_resource_metrics:dropped_queue_full_inc(Id), ?SLOG(info, #{ msg => buffer_worker_overflow, - worker_id => Id, + resource_id => Id, + worker_index => Index, dropped => Dropped }), {Items2, Q1} @@ -1133,7 +1162,7 @@ inflight_new(InfltWinSZ, Id, Index) -> inflight_append(TableId, {?SIZE_REF, 0}, Id, Index), inflight_append(TableId, {?INITIAL_TIME_REF, erlang:system_time()}, Id, Index), inflight_append( - TableId, {?INITIAL_MONOTONIC_TIME_REF, make_message_ref()}, Id, Index + TableId, {?INITIAL_MONOTONIC_TIME_REF, make_request_ref()}, Id, Index ), TableId. @@ -1372,8 +1401,8 @@ cancel_flush_timer(St = #{tref := {TRef, _Ref}}) -> _ = erlang:cancel_timer(TRef), St#{tref => undefined}. --spec make_message_ref() -> inflight_key(). -make_message_ref() -> +-spec make_request_ref() -> inflight_key(). +make_request_ref() -> now_(). collect_requests(Acc, Limit) -> @@ -1459,3 +1488,15 @@ ensure_expire_at(#{timeout := TimeoutMS} = Opts) -> TimeoutNS = erlang:convert_time_unit(TimeoutMS, millisecond, nanosecond), ExpireAt = now_() + TimeoutNS, Opts#{expire_at => ExpireAt}. + +%% no need to keep the request for async reply handler +minimize(?QUERY(_, _, _, _) = Q) -> + do_minimize(Q); +minimize(L) when is_list(L) -> + lists:map(fun do_minimize/1, L). + +-ifdef(TEST). +do_minimize(?QUERY(_ReplyTo, _Req, _Sent, _ExpireAt) = Query) -> Query. +-else. +do_minimize(?QUERY(ReplyTo, _Req, Sent, ExpireAt)) -> ?QUERY(ReplyTo, [], Sent, ExpireAt). +-endif. diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 227b6fedc..fc201e048 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -2335,7 +2335,7 @@ assert_async_retry_fail_then_succeed_inflight(Trace) -> ct:pal(" ~p", [Trace]), ?assert( ?strict_causality( - #{?snk_kind := buffer_worker_reply_after_query, action := nack, ref := _Ref}, + #{?snk_kind := buffer_worker_reply_after_query, action := nack}, #{?snk_kind := buffer_worker_retry_inflight_failed, ref := _Ref}, Trace ) From 940b238ad9da3dfa0e012c4bad98fdaf62ce958b Mon Sep 17 00:00:00 2001 From: Antoine Bertin Date: Sun, 22 Jan 2023 18:00:15 +0100 Subject: [PATCH 033/131] fix: add clusterDomain in values --- deploy/charts/emqx/templates/configmap.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/charts/emqx/templates/configmap.yaml b/deploy/charts/emqx/templates/configmap.yaml index 5086f85f6..6bd815ca0 100644 --- a/deploy/charts/emqx/templates/configmap.yaml +++ b/deploy/charts/emqx/templates/configmap.yaml @@ -16,9 +16,9 @@ data: EMQX_CLUSTER__K8S__SERVICE_NAME: {{ include "emqx.fullname" . }}-headless EMQX_CLUSTER__K8S__NAMESPACE: {{ .Release.Namespace }} EMQX_CLUSTER__K8S__ADDRESS_TYPE: "hostname" - EMQX_CLUSTER__K8S__SUFFIX: "svc.cluster.local" + EMQX_CLUSTER__K8S__SUFFIX: "svc.{{ .Values.clusterDomain }}" {{- else if eq (.Values.emqxConfig.EMQX_CLUSTER__DISCOVERY_STRATEGY) "dns" }} - EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.cluster.local" + EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" EMQX_CLUSTER__DNS__RECORD_TYPE: "srv" {{- end -}} {{- range $index, $value := .Values.emqxConfig }} From 297d33dbad4a2a8dded59b1633f0bf79de6a38eb Mon Sep 17 00:00:00 2001 From: Antoine Bertin Date: Sun, 22 Jan 2023 18:05:12 +0100 Subject: [PATCH 034/131] feat: add clusterDomain value --- deploy/charts/emqx/values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deploy/charts/emqx/values.yaml b/deploy/charts/emqx/values.yaml index 0423c8cdf..c737c8808 100644 --- a/deploy/charts/emqx/values.yaml +++ b/deploy/charts/emqx/values.yaml @@ -35,6 +35,8 @@ serviceAccount: ## Forces the recreation of pods during helm upgrades. This can be useful to update configuration values even if the container image did not change. recreatePods: false +clusterDomain: cluster.local + podAnnotations: {} # Pod deployment policy From 81e7f26ec6173bc1b42cdbf5213c32e5adabd8f0 Mon Sep 17 00:00:00 2001 From: Antoine Bertin Date: Fri, 27 Jan 2023 23:44:58 +0100 Subject: [PATCH 035/131] fix: add clusterDomain to enterprise chart --- deploy/charts/emqx-enterprise/templates/configmap.yaml | 4 ++-- deploy/charts/emqx-enterprise/values.yaml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/deploy/charts/emqx-enterprise/templates/configmap.yaml b/deploy/charts/emqx-enterprise/templates/configmap.yaml index 5086f85f6..6bd815ca0 100644 --- a/deploy/charts/emqx-enterprise/templates/configmap.yaml +++ b/deploy/charts/emqx-enterprise/templates/configmap.yaml @@ -16,9 +16,9 @@ data: EMQX_CLUSTER__K8S__SERVICE_NAME: {{ include "emqx.fullname" . }}-headless EMQX_CLUSTER__K8S__NAMESPACE: {{ .Release.Namespace }} EMQX_CLUSTER__K8S__ADDRESS_TYPE: "hostname" - EMQX_CLUSTER__K8S__SUFFIX: "svc.cluster.local" + EMQX_CLUSTER__K8S__SUFFIX: "svc.{{ .Values.clusterDomain }}" {{- else if eq (.Values.emqxConfig.EMQX_CLUSTER__DISCOVERY_STRATEGY) "dns" }} - EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.cluster.local" + EMQX_CLUSTER__DNS__NAME: "{{ include "emqx.fullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" EMQX_CLUSTER__DNS__RECORD_TYPE: "srv" {{- end -}} {{- range $index, $value := .Values.emqxConfig }} diff --git a/deploy/charts/emqx-enterprise/values.yaml b/deploy/charts/emqx-enterprise/values.yaml index b9507c5a0..3a607a71e 100644 --- a/deploy/charts/emqx-enterprise/values.yaml +++ b/deploy/charts/emqx-enterprise/values.yaml @@ -35,6 +35,8 @@ serviceAccount: ## Forces the recreation of pods during helm upgrades. This can be useful to update configuration values even if the container image did not change. recreatePods: false +clusterDomain: cluster.local + podAnnotations: {} # Pod deployment policy From d47941601d1d78cc4fe345299cf9a9df60c2fd83 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 27 Jan 2023 17:27:27 +0100 Subject: [PATCH 036/131] refactor(buffer_worker): rename trace points --- .../src/emqx_resource_buffer_worker.erl | 29 +++++++------------ .../test/emqx_resource_SUITE.erl | 16 +++++----- .../kafka/emqx_bridge_impl_kafka_producer.erl | 2 +- .../test/emqx_ee_bridge_influxdb_SUITE.erl | 4 +-- 4 files changed, 22 insertions(+), 29 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 355fb276c..0ac627d29 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -960,13 +960,13 @@ handle_async_reply( resource_id := Id, worker_index := Index, buffer_worker := Pid, - query := ?QUERY(_, _, _, ExpireAt) = Query + query := ?QUERY(_, _, _, ExpireAt) = _Query } = ReplyContext, Result ) -> ?tp( - buffer_worker_reply_after_query_enter, - #{batch_or_query => [Query], ref => Ref} + handle_async_reply_enter, + #{batch_or_query => [_Query], ref => Ref} ), Now = now_(), case is_expired(ExpireAt, Now) of @@ -975,7 +975,7 @@ handle_async_reply( IsAcked = ack_inflight(InflightTID, Ref, Id, Index), IsAcked andalso emqx_resource_metrics:late_reply_inc(Id), IsFullBefore andalso ?MODULE:flush_worker(Pid), - ?tp(buffer_worker_reply_after_query_expired, #{expired => [Query]}), + ?tp(handle_async_reply_expired, #{expired => [_Query]}), ok; false -> do_handle_async_reply(ReplyContext, Result) @@ -989,7 +989,7 @@ do_handle_async_reply( worker_index := Index, buffer_worker := Pid, inflight_tid := InflightTID, - query := ?QUERY(ReplyTo, _, Sent, _ExpireAt) = Query + query := ?QUERY(ReplyTo, _, Sent, _ExpireAt) = _Query }, Result ) -> @@ -1000,9 +1000,9 @@ do_handle_async_reply( Id, ?REPLY(ReplyTo, Sent, Result), QueryOpts ), - ?tp(buffer_worker_reply_after_query, #{ + ?tp(handle_async_reply, #{ action => Action, - batch_or_query => [Query], + batch_or_query => [_Query], ref => Ref, result => Result }), @@ -1028,7 +1028,7 @@ handle_async_batch_reply( Result ) -> ?tp( - buffer_worker_reply_after_query_enter, + handle_async_reply_enter, #{batch_or_query => Batch, ref => Ref} ), Now = now_(), @@ -1038,13 +1038,13 @@ handle_async_batch_reply( IsAcked = ack_inflight(InflightTID, Ref, Id, Index), IsAcked andalso emqx_resource_metrics:late_reply_inc(Id), IsFullBefore andalso ?MODULE:flush_worker(Pid), - ?tp(buffer_worker_reply_after_query_expired, #{expired => Batch}), + ?tp(handle_async_reply_expired, #{expired => Batch}), ok; {NotExpired, Expired} -> NumExpired = length(Expired), emqx_resource_metrics:late_reply_inc(Id, NumExpired), NumExpired > 0 andalso - ?tp(buffer_worker_reply_after_query_expired, #{expired => Expired}), + ?tp(handle_async_reply_expired, #{expired => Expired}), do_handle_async_batch_reply(ReplyContext#{batch := NotExpired}, Result) end. @@ -1060,15 +1060,8 @@ do_handle_async_batch_reply( }, Result ) -> - ?tp( - buffer_worker_reply_after_query_enter, - #{batch_or_query => Batch, ref => Ref} - ), - %% NOTE: 'inflight' is the count of messages that were sent async - %% but received no ACK, NOT the number of messages queued in the - %% inflight window. {Action, PostFn} = batch_reply_caller_defer_metrics(Id, Result, Batch, QueryOpts), - ?tp(buffer_worker_reply_after_query, #{ + ?tp(handle_async_reply, #{ action => Action, batch_or_query => Batch, ref => Ref, diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index fc201e048..86336a38f 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -1718,7 +1718,7 @@ do_t_expiration_before_sending_partial_batch(QueryMode) -> async -> {ok, _} = ?block_until( #{ - ?snk_kind := buffer_worker_reply_after_query, + ?snk_kind := handle_async_reply, action := ack, batch_or_query := [{query, _, {inc_counter, 99}, _, _}] }, @@ -1849,7 +1849,7 @@ do_t_expiration_async_after_reply(IsBatch) -> ?force_ordering( #{?snk_kind := delay}, #{ - ?snk_kind := buffer_worker_reply_after_query_enter, + ?snk_kind := handle_async_reply_enter, batch_or_query := [{query, _, {inc_counter, 199}, _, _} | _] } ), @@ -1874,7 +1874,7 @@ do_t_expiration_async_after_reply(IsBatch) -> #{?snk_kind := buffer_worker_flush_potentially_partial}, 4 * TimeoutMS ), {ok, _} = ?block_until( - #{?snk_kind := buffer_worker_reply_after_query_expired}, 10 * TimeoutMS + #{?snk_kind := handle_async_reply_expired}, 10 * TimeoutMS ), unlink(Pid0), @@ -1888,7 +1888,7 @@ do_t_expiration_async_after_reply(IsBatch) -> expired := [{query, _, {inc_counter, 199}, _, _}] } ], - ?of_kind(buffer_worker_reply_after_query_expired, Trace) + ?of_kind(handle_async_reply_expired, Trace) ), wait_telemetry_event(success, #{n_events => 1, timeout => 4_000}), Metrics = tap_metrics(?LINE), @@ -1936,7 +1936,7 @@ t_expiration_batch_all_expired_after_reply(_Config) -> ?force_ordering( #{?snk_kind := delay}, #{ - ?snk_kind := buffer_worker_reply_after_query_enter, + ?snk_kind := handle_async_reply_enter, batch_or_query := [{query, _, {inc_counter, 199}, _, _} | _] } ), @@ -1955,7 +1955,7 @@ t_expiration_batch_all_expired_after_reply(_Config) -> end), {ok, _} = ?block_until( - #{?snk_kind := buffer_worker_reply_after_query_expired}, 10 * TimeoutMS + #{?snk_kind := handle_async_reply_expired}, 10 * TimeoutMS ), unlink(Pid0), @@ -1969,7 +1969,7 @@ t_expiration_batch_all_expired_after_reply(_Config) -> expired := [{query, _, {inc_counter, 199}, _, _}] } ], - ?of_kind(buffer_worker_reply_after_query_expired, Trace) + ?of_kind(handle_async_reply_expired, Trace) ), Metrics = tap_metrics(?LINE), ?assertMatch( @@ -2335,7 +2335,7 @@ assert_async_retry_fail_then_succeed_inflight(Trace) -> ct:pal(" ~p", [Trace]), ?assert( ?strict_causality( - #{?snk_kind := buffer_worker_reply_after_query, action := nack}, + #{?snk_kind := handle_async_reply, action := nack}, #{?snk_kind := buffer_worker_retry_inflight_failed, ref := _Ref}, Trace ) diff --git a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl index 18e27b775..1ac619626 100644 --- a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl +++ b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl @@ -227,7 +227,7 @@ render_timestamp(Template, Message) -> %% Wolff producer never gives up retrying %% so there can only be 'ok' results. on_kafka_ack(_Partition, Offset, {ReplyFn, Args}) when is_integer(Offset) -> - %% the ReplyFn is emqx_resource_worker:reply_after_query/8 + %% the ReplyFn is emqx_resource_worker:handle_async_reply/2 apply(ReplyFn, Args ++ [ok]); on_kafka_ack(_Partition, buffer_overflow_discarded, _Callback) -> %% wolff should bump the dropped_queue_full counter diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl index cd7f848c2..bbde88cc7 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_influxdb_SUITE.erl @@ -920,7 +920,7 @@ t_write_failure(Config) -> async -> ?wait_async_action( ?assertEqual(ok, send_message(Config, SentData)), - #{?snk_kind := buffer_worker_reply_after_query}, + #{?snk_kind := handle_async_reply}, 1_000 ) end @@ -938,7 +938,7 @@ t_write_failure(Config) -> #{got => Result} ); async -> - Trace = ?of_kind(buffer_worker_reply_after_query, Trace0), + Trace = ?of_kind(handle_async_reply, Trace0), ?assertMatch([#{action := nack} | _], Trace), [#{result := Result} | _] = Trace, ?assert( From 6cbad047cd1283f0c4b50975c28aaf1e063abde2 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 12:16:17 +0800 Subject: [PATCH 037/131] fix: don't log CONNECT twice when debug --- apps/emqx/src/emqx_connection.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index 5ed302a6f..714b077ca 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -550,6 +550,7 @@ handle_msg( }, handle_incoming(Packet, NState); handle_msg({incoming, Packet}, State) -> + ?TRACE("MQTT", "mqtt_packet_received", #{packet => Packet}), handle_incoming(Packet, State); handle_msg({outgoing, Packets}, State) -> handle_outgoing(Packets, State); @@ -783,7 +784,6 @@ parse_incoming(Data, Packets, State = #state{parse_state = ParseState}) -> handle_incoming(Packet, State) when is_record(Packet, mqtt_packet) -> ok = inc_incoming_stats(Packet), - ?TRACE("MQTT", "mqtt_packet_received", #{packet => Packet}), with_channel(handle_in, [Packet], State); handle_incoming(FrameError, State) -> with_channel(handle_in, [FrameError], State). From ce32ea7334da3cdb925d0c556f3e7d1a6dca98dc Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 12:18:22 +0800 Subject: [PATCH 038/131] feat: Make the log output format order fixed --- apps/emqx/include/logger.hrl | 4 +- apps/emqx/src/emqx_cm.erl | 2 +- apps/emqx/src/emqx_logger_textfmt.erl | 65 ++++++++++++++------------- apps/emqx/test/emqx_mqtt_SUITE.erl | 2 +- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/apps/emqx/include/logger.hrl b/apps/emqx/include/logger.hrl index e93aa46f4..27ffc6cc0 100644 --- a/apps/emqx/include/logger.hrl +++ b/apps/emqx/include/logger.hrl @@ -48,9 +48,9 @@ -define(TRACE(Level, Tag, Msg, Meta), begin case persistent_term:get(?TRACE_FILTER, []) of [] -> ok; - %% We can't bind filter list to a variablebecause we pollute the calling scope with it. + %% We can't bind filter list to a variable because we pollute the calling scope with it. %% We also don't want to wrap the macro body in a fun - %% beacause this adds overhead to the happy path. + %% because this adds overhead to the happy path. %% So evaluate `persistent_term:get` twice. _ -> emqx_trace:log(persistent_term:get(?TRACE_FILTER, []), Msg, (Meta)#{trace_tag => Tag}) end, diff --git a/apps/emqx/src/emqx_cm.erl b/apps/emqx/src/emqx_cm.erl index 66e9a2aee..77bc44eeb 100644 --- a/apps/emqx/src/emqx_cm.erl +++ b/apps/emqx/src/emqx_cm.erl @@ -152,7 +152,7 @@ start_link() -> insert_channel_info(ClientId, Info, Stats) -> Chan = {ClientId, self()}, true = ets:insert(?CHAN_INFO_TAB, {Chan, Info, Stats}), - ?tp(debug, insert_channel_info, #{client_id => ClientId}), + ?tp(debug, insert_channel_info, #{clientid => ClientId}), ok. %% @private diff --git a/apps/emqx/src/emqx_logger_textfmt.erl b/apps/emqx/src/emqx_logger_textfmt.erl index 3695929d9..c1d85f341 100644 --- a/apps/emqx/src/emqx_logger_textfmt.erl +++ b/apps/emqx/src/emqx_logger_textfmt.erl @@ -22,20 +22,49 @@ check_config(X) -> logger_formatter:check_config(X). -format(#{msg := {report, Report0}, meta := Meta} = Event, Config) when is_map(Report0) -> - Report1 = enrich_report_mfa(Report0, Meta), - Report2 = enrich_report_clientid(Report1, Meta), - Report3 = enrich_report_peername(Report2, Meta), - Report4 = enrich_report_topic(Report3, Meta), - logger_formatter:format(Event#{msg := {report, Report4}}, Config); +format(#{msg := {report, ReportMap}, meta := Meta} = Event, Config) when is_map(ReportMap) -> + Report = enrich_report(ReportMap, Meta), + logger_formatter:format(Event#{msg := {report, Report}}, Config); format(#{msg := {string, String}} = Event, Config) -> format(Event#{msg => {"~ts ", [String]}}, Config); +%% trace format(#{msg := Msg0, meta := Meta} = Event, Config) -> Msg1 = enrich_client_info(Msg0, Meta), Msg2 = enrich_mfa(Msg1, Meta), Msg3 = enrich_topic(Msg2, Meta), logger_formatter:format(Event#{msg := Msg3}, Config). +enrich_report(ReportRaw, Meta) -> + %% clientid and peername always in emqx_conn's process metadata. + %% topic can be put in meta using ?SLOG/3, or put in msg's report by ?SLOG/2 + Topic = + case maps:get(topic, Meta, undefined) of + undefined -> maps:get(topic, ReportRaw, undefined); + Topic0 -> Topic0 + end, + ClientId = maps:get(clientid, Meta, undefined), + Peer = maps:get(peername, Meta, undefined), + MFA = maps:get(mfa, Meta, undefined), + Line = maps:get(line, Meta, undefined), + Msg = maps:get(msg, ReportRaw, undefined), + lists:foldl( + fun + ({_, undefined}, Acc) -> Acc; + (Item, Acc) -> [Item | Acc] + end, + maps:to_list(maps:without([topic, msg, clientid], ReportRaw)), + [ + {topic, try_format_unicode(Topic)}, + {clientid, try_format_unicode(ClientId)}, + {peername, Peer}, + {line, Line}, + {mfa, mfa(MFA)}, + {msg, Msg} + ] + ). + +try_format_unicode(undefined) -> + undefined; try_format_unicode(Char) -> List = try @@ -53,30 +82,6 @@ try_format_unicode(Char) -> _ -> List end. -enrich_report_mfa(Report, #{mfa := Mfa, line := Line}) -> - Report#{mfa => mfa(Mfa), line => Line}; -enrich_report_mfa(Report, _) -> - Report. - -enrich_report_clientid(Report, #{clientid := ClientId}) -> - Report#{clientid => try_format_unicode(ClientId)}; -enrich_report_clientid(Report, _) -> - Report. - -enrich_report_peername(Report, #{peername := Peername}) -> - Report#{peername => Peername}; -enrich_report_peername(Report, _) -> - Report. - -%% clientid and peername always in emqx_conn's process metadata. -%% topic can be put in meta using ?SLOG/3, or put in msg's report by ?SLOG/2 -enrich_report_topic(Report, #{topic := Topic}) -> - Report#{topic => try_format_unicode(Topic)}; -enrich_report_topic(Report = #{topic := Topic}, _) -> - Report#{topic => try_format_unicode(Topic)}; -enrich_report_topic(Report, _) -> - Report. - enrich_mfa({Fmt, Args}, #{mfa := Mfa, line := Line}) when is_list(Fmt) -> {Fmt ++ " mfa: ~ts line: ~w", Args ++ [mfa(Mfa), Line]}; enrich_mfa(Msg, _) -> diff --git a/apps/emqx/test/emqx_mqtt_SUITE.erl b/apps/emqx/test/emqx_mqtt_SUITE.erl index 287d7fdba..d0162b34b 100644 --- a/apps/emqx/test/emqx_mqtt_SUITE.erl +++ b/apps/emqx/test/emqx_mqtt_SUITE.erl @@ -237,7 +237,7 @@ do_async_set_keepalive() -> {ok, _} = ?block_until( #{ ?snk_kind := insert_channel_info, - client_id := ClientID + clientid := ClientID }, 2000, 100 From 25090563afe5715e70c0b730c0bcfd915ba5a399 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 12:28:51 +0800 Subject: [PATCH 039/131] chore: use brackets to wrap the mqtt packet when logging --- apps/emqx/src/emqx_packet.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx/src/emqx_packet.erl b/apps/emqx/src/emqx_packet.erl index c247419f0..9fa0b00da 100644 --- a/apps/emqx/src/emqx_packet.erl +++ b/apps/emqx/src/emqx_packet.erl @@ -477,8 +477,8 @@ format(Packet) -> format(Packet, emqx_trace_handler:payload_encode()). format(#mqtt_packet{header = Header, variable = Variable, payload = Payload}, PayloadEncode) -> HeaderIO = format_header(Header), case format_variable(Variable, Payload, PayloadEncode) of - "" -> HeaderIO; - VarIO -> [HeaderIO, ",", VarIO] + "" -> [HeaderIO, ")"]; + VarIO -> [HeaderIO, ", ", VarIO, ")"] end. format_header(#mqtt_packet_header{ @@ -487,14 +487,14 @@ format_header(#mqtt_packet_header{ qos = QoS, retain = Retain }) -> - io_lib:format("~ts(Q~p, R~p, D~p)", [type_name(Type), QoS, i(Retain), i(Dup)]). + io_lib:format("~ts(Q~p, R~p, D~p", [type_name(Type), QoS, i(Retain), i(Dup)]). format_variable(undefined, _, _) -> ""; format_variable(Variable, undefined, PayloadEncode) -> format_variable(Variable, PayloadEncode); format_variable(Variable, Payload, PayloadEncode) -> - [format_variable(Variable, PayloadEncode), ",", format_payload(Payload, PayloadEncode)]. + [format_variable(Variable, PayloadEncode), ", ", format_payload(Payload, PayloadEncode)]. format_variable( #mqtt_packet_connect{ From 3d07271ea50bab0fdbfffa8193a2d41e512fec14 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 14:30:20 +0800 Subject: [PATCH 040/131] fix: crash when mfa not found --- apps/emqx/src/emqx_logger_textfmt.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/emqx/src/emqx_logger_textfmt.erl b/apps/emqx/src/emqx_logger_textfmt.erl index c1d85f341..fe67153ec 100644 --- a/apps/emqx/src/emqx_logger_textfmt.erl +++ b/apps/emqx/src/emqx_logger_textfmt.erl @@ -101,4 +101,5 @@ enrich_topic({Fmt, Args}, #{topic := Topic}) when is_list(Fmt) -> enrich_topic(Msg, _) -> Msg. +mfa(undefined) -> undefined; mfa({M, F, A}) -> atom_to_list(M) ++ ":" ++ atom_to_list(F) ++ "/" ++ integer_to_list(A). From b73d11675e754ea082d208e17a567541792b633e Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 15:05:49 +0800 Subject: [PATCH 041/131] chore: log the bad mqtt packet(frame error) --- apps/emqx/src/emqx_logger_textfmt.erl | 2 +- apps/emqx/src/emqx_packet.erl | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/emqx/src/emqx_logger_textfmt.erl b/apps/emqx/src/emqx_logger_textfmt.erl index fe67153ec..fb27681b8 100644 --- a/apps/emqx/src/emqx_logger_textfmt.erl +++ b/apps/emqx/src/emqx_logger_textfmt.erl @@ -102,4 +102,4 @@ enrich_topic(Msg, _) -> Msg. mfa(undefined) -> undefined; -mfa({M, F, A}) -> atom_to_list(M) ++ ":" ++ atom_to_list(F) ++ "/" ++ integer_to_list(A). +mfa({M, F, A}) -> [atom_to_list(M), ":", atom_to_list(F), "/" ++ integer_to_list(A)]. diff --git a/apps/emqx/src/emqx_packet.erl b/apps/emqx/src/emqx_packet.erl index 9fa0b00da..32bd3df53 100644 --- a/apps/emqx/src/emqx_packet.erl +++ b/apps/emqx/src/emqx_packet.erl @@ -479,7 +479,11 @@ format(#mqtt_packet{header = Header, variable = Variable, payload = Payload}, Pa case format_variable(Variable, Payload, PayloadEncode) of "" -> [HeaderIO, ")"]; VarIO -> [HeaderIO, ", ", VarIO, ")"] - end. + end; +%% receive a frame error packet, such as {frame_error,frame_too_large} or +%% {frame_error,#{expected => <<"'MQTT' or 'MQIsdp'">>,hint => invalid_proto_name,received => <<"bad_name">>}} +format(FrameError, _PayloadEncode) -> + lists:flatten(io_lib:format("~tp", [FrameError])). format_header(#mqtt_packet_header{ type = Type, From bb636394e19f9b3a86a7c335caa52e449a996ab1 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 16:35:06 +0800 Subject: [PATCH 042/131] chore: add debug log for raw data --- apps/emqx/src/emqx_connection.erl | 6 ++++++ apps/emqx/src/emqx_ws_connection.erl | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index 714b077ca..5b783f2fe 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -732,6 +732,12 @@ handle_timeout(TRef, Msg, State) -> %% Parse incoming data -compile({inline, [when_bytes_in/3]}). when_bytes_in(Oct, Data, State) -> + ?SLOG(debug, #{ + msg => "raw_bin_received", + size => Oct, + bin => binary_to_list(binary:encode_hex(Data)), + type => "hex" + }), {Packets, NState} = parse_incoming(Data, [], State), Len = erlang:length(Packets), check_limiter( diff --git a/apps/emqx/src/emqx_ws_connection.erl b/apps/emqx/src/emqx_ws_connection.erl index 817c4b505..ead609ed8 100644 --- a/apps/emqx/src/emqx_ws_connection.erl +++ b/apps/emqx/src/emqx_ws_connection.erl @@ -399,6 +399,12 @@ get_peer_info(Type, Listener, Req, Opts) -> websocket_handle({binary, Data}, State) when is_list(Data) -> websocket_handle({binary, iolist_to_binary(Data)}, State); websocket_handle({binary, Data}, State) -> + ?SLOG(debug, #{ + msg => "raw_bin_received", + size => iolist_size(Data), + bin => binary_to_list(binary:encode_hex(Data)), + type => "hex" + }), State2 = ensure_stats_timer(State), {Packets, State3} = parse_incoming(Data, [], State2), LenMsg = erlang:length(Packets), @@ -437,6 +443,7 @@ websocket_info({incoming, Packet = ?CONNECT_PACKET(ConnPkt)}, State) -> NState = State#state{serialize = Serialize}, handle_incoming(Packet, cancel_idle_timer(NState)); websocket_info({incoming, Packet}, State) -> + ?TRACE("WS-MQTT", "mqtt_packet_received", #{packet => Packet}), handle_incoming(Packet, State); websocket_info({outgoing, Packets}, State) -> return(enqueue(Packets, State)); @@ -719,7 +726,6 @@ parse_incoming(Data, Packets, State = #state{parse_state = ParseState}) -> handle_incoming(Packet, State = #state{listener = {Type, Listener}}) when is_record(Packet, mqtt_packet) -> - ?TRACE("WS-MQTT", "mqtt_packet_received", #{packet => Packet}), ok = inc_incoming_stats(Packet), NState = case From 27881064dc8b8e77765435e1afa571f48c09f6bf Mon Sep 17 00:00:00 2001 From: Stefan Strigler Date: Mon, 30 Jan 2023 11:34:04 +0100 Subject: [PATCH 043/131] fix: increase dropped.queue_full by number of messages --- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 50534df4f..56aff94f6 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -1094,7 +1094,7 @@ append_queue(Id, Index, Q, Queries) -> {Q1, QAckRef, Items2} = replayq:pop(Q0, PopOpts), ok = replayq:ack(Q1, QAckRef), Dropped = length(Items2), - emqx_resource_metrics:dropped_queue_full_inc(Id), + emqx_resource_metrics:dropped_queue_full_inc(Id, Dropped), ?SLOG(info, #{ msg => buffer_worker_overflow, worker_id => Id, From 0c4134c4231992a8915bfb2d5f054c8a1a09126f Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 26 Jan 2023 12:30:48 +0100 Subject: [PATCH 044/131] test: add unit test case for redact function in http connector --- .../src/emqx_connector_http.erl | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/apps/emqx_connector/src/emqx_connector_http.erl b/apps/emqx_connector/src/emqx_connector_http.erl index 40df52d45..c99067bdc 100644 --- a/apps/emqx_connector/src/emqx_connector_http.erl +++ b/apps/emqx_connector/src/emqx_connector_http.erl @@ -592,3 +592,33 @@ is_sensitive_key(_) -> %% information (i.e., passwords) redact(Data) -> emqx_misc:redact(Data, fun is_sensitive_key/1). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +redact_test_() -> + TestData1 = [ + {<<"content-type">>, <<"application/json">>}, + {<<"Authorization">>, <<"Basic YWxhZGRpbjpvcGVuc2VzYW1l">>} + ], + + TestData2 = #{ + headers => + [ + {[{str, <<"content-type">>}], [{str, <<"application/json">>}]}, + {[{str, <<"Authorization">>}], [{str, <<"Basic YWxhZGRpbjpvcGVuc2VzYW1l">>}]} + ] + }, + [ + ?_assert(is_sensitive_key(<<"Authorization">>)), + ?_assert(is_sensitive_key(<<"AuthoriZation">>)), + ?_assert(is_sensitive_key('AuthoriZation')), + ?_assert(is_sensitive_key(<<"PrOxy-authoRizaTion">>)), + ?_assert(is_sensitive_key('PrOxy-authoRizaTion')), + ?_assertNot(is_sensitive_key(<<"Something">>)), + ?_assertNot(is_sensitive_key(89)), + ?_assertNotEqual(TestData1, redact(TestData1)), + ?_assertNotEqual(TestData2, redact(TestData2)) + ]. + +-endif. From 33e011aff551b9e9efb187e590dd2fbb1537b0dc Mon Sep 17 00:00:00 2001 From: Erik Timan Date: Fri, 20 Jan 2023 13:48:35 +0100 Subject: [PATCH 045/131] fix(emqx_management): handle multiple routes in topics/{topic} API The topics/{topic} API endpoint would return 500 - Internal Error if a topic had multiple routes. This is now fixed by returning a list of routes. --- .../emqx_management/src/emqx_mgmt_api_topics.erl | 7 ++++--- .../test/emqx_mgmt_api_topics_SUITE.erl | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_api_topics.erl b/apps/emqx_management/src/emqx_mgmt_api_topics.erl index a64badd3a..4100269e5 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_topics.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_topics.erl @@ -75,7 +75,7 @@ schema("/topics/:topic") -> tags => ?TAGS, parameters => [topic_param(path)], responses => #{ - 200 => hoconsc:mk(hoconsc:ref(topic), #{}), + 200 => hoconsc:mk(hoconsc:array(hoconsc:ref(topic)), #{}), 404 => emqx_dashboard_swagger:error_codes(['TOPIC_NOT_FOUND'], <<"Topic not found">>) } @@ -130,8 +130,9 @@ lookup(#{topic := Topic}) -> case emqx_router:lookup_routes(Topic) of [] -> {404, #{code => ?TOPIC_NOT_FOUND, message => <<"Topic not found">>}}; - [Route] -> - {200, format(Route)} + Routes when is_list(Routes) -> + Formatted = [format(Route) || Route <- Routes], + {200, Formatted} end. %%%============================================================================================== diff --git a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl index dcea88d59..70bf1a780 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl @@ -72,8 +72,18 @@ t_nodes_api(_) -> ), %% get topics/:topic + %% We add another route here to ensure that the response handles + %% multiple routes for a single topic + DummyNode = 'dummy-node-name', + ok = emqx_router:add_route(Topic, DummyNode), RoutePath = emqx_mgmt_api_test_util:api_path(["topics", Topic]), {ok, RouteResponse} = emqx_mgmt_api_test_util:request_api(get, RoutePath), - RouteData = emqx_json:decode(RouteResponse, [return_maps]), - ?assertEqual(Topic, maps:get(<<"topic">>, RouteData)), - ?assertEqual(Node, maps:get(<<"node">>, RouteData)). + ok = emqx_router:delete_route(Topic, DummyNode), + + [ + #{<<"topic">> := Topic, <<"node">> := Node1}, + #{<<"topic">> := Topic, <<"node">> := Node2} + ] = emqx_json:decode(RouteResponse, [return_maps]), + + DummyNodeBin = atom_to_binary(DummyNode), + ?assertEqual(lists:usort([Node, DummyNodeBin]), lists:usort([Node1, Node2])). From 85d3c5cfd83ee980924c91a42edf2c388104298c Mon Sep 17 00:00:00 2001 From: Erik Timan Date: Fri, 20 Jan 2023 14:42:08 +0100 Subject: [PATCH 046/131] chore: update changes --- changes/v5.0.16/fix-9824.en.md | 1 + changes/v5.0.16/fix-9824.zh.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 changes/v5.0.16/fix-9824.en.md create mode 100644 changes/v5.0.16/fix-9824.zh.md diff --git a/changes/v5.0.16/fix-9824.en.md b/changes/v5.0.16/fix-9824.en.md new file mode 100644 index 000000000..29aa93264 --- /dev/null +++ b/changes/v5.0.16/fix-9824.en.md @@ -0,0 +1 @@ +The `topics/{topic}` API endpoint would return `500 - Internal Error` if a topic had multiple routes. This is fixed by returning a list of routes. diff --git a/changes/v5.0.16/fix-9824.zh.md b/changes/v5.0.16/fix-9824.zh.md new file mode 100644 index 000000000..143a39c16 --- /dev/null +++ b/changes/v5.0.16/fix-9824.zh.md @@ -0,0 +1 @@ +修复:当存在多个路由信息时,topics/{topic} 将会返回 500 - Internal Error 的问题,现在将会正确的返回路由信息列表。 From 03cabf6b26c786a1d643c025d2261e99773bd837 Mon Sep 17 00:00:00 2001 From: Erik Timan Date: Mon, 30 Jan 2023 08:51:40 +0100 Subject: [PATCH 047/131] chore: bump app VSN --- apps/emqx_management/src/emqx_management.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_management.app.src b/apps/emqx_management/src/emqx_management.app.src index ccb53dac4..158d65b6b 100644 --- a/apps/emqx_management/src/emqx_management.app.src +++ b/apps/emqx_management/src/emqx_management.app.src @@ -2,7 +2,7 @@ {application, emqx_management, [ {description, "EMQX Management API and CLI"}, % strict semver, bump manually! - {vsn, "5.0.12"}, + {vsn, "5.0.13"}, {modules, []}, {registered, [emqx_management_sup]}, {applications, [kernel, stdlib, emqx_plugins, minirest, emqx]}, From 5b3a77e3c7f44a54e2a7a17ca8764eede76dde11 Mon Sep 17 00:00:00 2001 From: Erik Timan Date: Mon, 30 Jan 2023 14:02:27 +0100 Subject: [PATCH 048/131] test(emqx_management): fix flaky route handling in get topic test This reworks a test case to use a second slave node. This ensures that an added route is permanently in the routing table. The old version reverted the routing table quickly since the node name given wasn't a real node. --- .../test/emqx_mgmt_api_topics_SUITE.erl | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl index 70bf1a780..8f9b224ef 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_topics_SUITE.erl @@ -19,18 +19,25 @@ -compile(nowarn_export_all). -include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +-define(ROUTE_TAB, emqx_route). all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> emqx_mgmt_api_test_util:init_suite(), - Config. + Slave = emqx_common_test_helpers:start_slave(some_node, []), + [{slave, Slave} | Config]. -end_per_suite(_) -> +end_per_suite(Config) -> + Slave = ?config(slave, Config), + emqx_common_test_helpers:stop_slave(Slave), + mria:clear_table(?ROUTE_TAB), emqx_mgmt_api_test_util:end_suite(). -t_nodes_api(_) -> +t_nodes_api(Config) -> Node = atom_to_binary(node(), utf8), Topic = <<"test_topic">>, {ok, Client} = emqtt:start_link(#{ @@ -74,16 +81,15 @@ t_nodes_api(_) -> %% get topics/:topic %% We add another route here to ensure that the response handles %% multiple routes for a single topic - DummyNode = 'dummy-node-name', - ok = emqx_router:add_route(Topic, DummyNode), + Slave = ?config(slave, Config), + ok = emqx_router:add_route(Topic, Slave), RoutePath = emqx_mgmt_api_test_util:api_path(["topics", Topic]), {ok, RouteResponse} = emqx_mgmt_api_test_util:request_api(get, RoutePath), - ok = emqx_router:delete_route(Topic, DummyNode), + ok = emqx_router:delete_route(Topic, Slave), [ #{<<"topic">> := Topic, <<"node">> := Node1}, #{<<"topic">> := Topic, <<"node">> := Node2} ] = emqx_json:decode(RouteResponse, [return_maps]), - DummyNodeBin = atom_to_binary(DummyNode), - ?assertEqual(lists:usort([Node, DummyNodeBin]), lists:usort([Node1, Node2])). + ?assertEqual(lists:usort([Node, atom_to_binary(Slave)]), lists:usort([Node1, Node2])). From 35c429ef1d43c02ac5315fc3e453db5abbc567e9 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 30 Jan 2023 14:49:56 +0300 Subject: [PATCH 049/131] refactor: drop a couple of unused macros / includes --- apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl index 870f9acfc..afe173985 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl @@ -38,12 +38,6 @@ ]). -include_lib("emqx/include/logger.hrl"). --include_lib("emqx/include/emqx_mqtt.hrl"). - --define(ACK_REF(ClientPid, PktId), {ClientPid, PktId}). - -%% Messages towards ack collector process --define(REF_IDS(Ref, Ids), {Ref, Ids}). %%-------------------------------------------------------------------- %% emqx_bridge_connect callbacks From 4d146c521b76a284a672a5605f18dd89e5554572 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 30 Jan 2023 14:51:09 +0300 Subject: [PATCH 050/131] fix(mqtt-bridge): ensure proper feedback on async forwards So that buffer worker would notice a connection loss in time, and recycle inflight messages subsequently. --- .../src/emqx_connector_mqtt.erl | 11 +--------- .../src/mqtt/emqx_connector_mqtt_mod.erl | 5 +++++ .../src/mqtt/emqx_connector_mqtt_worker.erl | 21 +++++++++++-------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/apps/emqx_connector/src/emqx_connector_mqtt.erl b/apps/emqx_connector/src/emqx_connector_mqtt.erl index 71ed81dda..585122539 100644 --- a/apps/emqx_connector/src/emqx_connector_mqtt.erl +++ b/apps/emqx_connector/src/emqx_connector_mqtt.erl @@ -198,10 +198,7 @@ on_query_async( #{name := InstanceId} ) -> ?TRACE("QUERY", "async_send_msg_to_remote_node", #{message => Msg, connector => InstanceId}), - %% this is a cast, currently. - ok = emqx_connector_mqtt_worker:send_to_remote_async(InstanceId, Msg, {ReplyFun, Args}), - WorkerPid = get_worker_pid(InstanceId), - {ok, WorkerPid}. + emqx_connector_mqtt_worker:send_to_remote_async(InstanceId, Msg, {ReplyFun, Args}). on_get_status(_InstId, #{name := InstanceId}) -> case emqx_connector_mqtt_worker:status(InstanceId) of @@ -215,12 +212,6 @@ ensure_mqtt_worker_started(InstanceId, BridgeConf) -> {error, Reason} -> {error, Reason} end. -%% mqtt workers, when created and called via bridge callbacks, are -%% registered. --spec get_worker_pid(atom()) -> pid(). -get_worker_pid(InstanceId) -> - whereis(InstanceId). - make_sub_confs(EmptyMap, _Conf, _) when map_size(EmptyMap) == 0 -> undefined; make_sub_confs(undefined, _Conf, _) -> diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl index afe173985..6acbe3bb4 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl @@ -26,6 +26,8 @@ ping/1 ]). +-export([info/2]). + -export([ ensure_subscribed/3, ensure_unsubscribed/2 @@ -90,6 +92,9 @@ ping(undefined) -> ping(#{client_pid := Pid}) -> emqtt:ping(Pid). +info(pid, #{client_pid := Pid}) -> + Pid. + ensure_subscribed(#{client_pid := Pid, subscriptions := Subs} = Conn, Topic, QoS) when is_pid(Pid) -> diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index 00b45789e..776d2d8d9 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -168,9 +168,9 @@ send_to_remote(Name, Msg) -> gen_statem:call(name(Name), {send_to_remote, Msg}). send_to_remote_async(Pid, Msg, Callback) when is_pid(Pid) -> - gen_statem:cast(Pid, {send_to_remote_async, Msg, Callback}); + gen_statem:call(Pid, {send_to_remote_async, Msg, Callback}); send_to_remote_async(Name, Msg, Callback) -> - gen_statem:cast(name(Name), {send_to_remote_async, Msg, Callback}). + gen_statem:call(name(Name), {send_to_remote_async, Msg, Callback}). %% @doc Return all forwards (local subscriptions). -spec get_forwards(id()) -> [topic()]. @@ -270,12 +270,14 @@ maybe_destroy_session(_State) -> idle({call, From}, ensure_started, State) -> case do_connect(State) of {ok, State1} -> - {next_state, connected, State1, [{reply, From, ok}, {state_timeout, 0, connected}]}; + {next_state, connected, State1, {reply, From, ok}}; {error, Reason, _State} -> {keep_state_and_data, {reply, From, {error, Reason}}} end; idle({call, From}, {send_to_remote, _}, _State) -> {keep_state_and_data, {reply, From, {error, {recoverable_error, not_connected}}}}; +idle({call, From}, {send_to_remote_async, _, _}, _State) -> + {keep_state_and_data, {reply, From, {error, {recoverable_error, not_connected}}}}; %% @doc Standing by for manual start. idle(info, idle, #{start_type := manual}) -> keep_state_and_data; @@ -290,14 +292,11 @@ idle(Type, Content, State) -> connecting(#{reconnect_interval := ReconnectDelayMs} = State) -> case do_connect(State) of {ok, State1} -> - {next_state, connected, State1, {state_timeout, 0, connected}}; + {next_state, connected, State1}; _ -> {keep_state_and_data, {state_timeout, ReconnectDelayMs, reconnect}} end. -connected(state_timeout, connected, State) -> - %% nothing to do - {keep_state, State}; connected({call, From}, {send_to_remote, Msg}, State) -> case do_send(State, Msg) of {ok, NState} -> @@ -305,9 +304,13 @@ connected({call, From}, {send_to_remote, Msg}, State) -> {error, Reason} -> {keep_state_and_data, {reply, From, {error, Reason}}} end; -connected(cast, {send_to_remote_async, Msg, Callback}, State) -> +connected( + {call, From}, + {send_to_remote_async, Msg, Callback}, + State = #{connection := Connection} +) -> _ = do_send_async(State, Msg, Callback), - {keep_state, State}; + {keep_state, State, {reply, From, {ok, emqx_connector_mqtt_mod:info(pid, Connection)}}}; connected( info, {disconnected, Conn, Reason}, From e7ef53558055963b28a5d55b88636ee38274437b Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Tue, 31 Jan 2023 09:48:56 +0100 Subject: [PATCH 051/131] docs: add change log entry for webhook Authorization header leak --- changes/v5.0.16/fix-9839.en.md | 1 + changes/v5.0.16/fix-9839.zh.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 changes/v5.0.16/fix-9839.en.md create mode 100644 changes/v5.0.16/fix-9839.zh.md diff --git a/changes/v5.0.16/fix-9839.en.md b/changes/v5.0.16/fix-9839.en.md new file mode 100644 index 000000000..9962b6338 --- /dev/null +++ b/changes/v5.0.16/fix-9839.en.md @@ -0,0 +1 @@ +Make sure that the content of an Authorization header that users have specified for a webhook bridge is not printed to log files. diff --git a/changes/v5.0.16/fix-9839.zh.md b/changes/v5.0.16/fix-9839.zh.md new file mode 100644 index 000000000..d9e1e0ad8 --- /dev/null +++ b/changes/v5.0.16/fix-9839.zh.md @@ -0,0 +1 @@ +确保用户为webhook-bridge指定的Authorization-HTTP-header的内容不会被打印到日志文件。 From e7b8df6ba9591abd15d89debe703c072af2e939c Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 12:46:37 +0100 Subject: [PATCH 052/131] chore(bin/emqx): hide upgrade/downgrade from usage info --- bin/emqx | 12 ++++++------ bin/install_upgrade.escript | 21 ++++++--------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/bin/emqx b/bin/emqx index ebf10bada..2c2609b36 100755 --- a/bin/emqx +++ b/bin/emqx @@ -159,7 +159,7 @@ usage() { echo "Evaluate an Erlang expression in the EMQX node, even on Elixir node" ;; versions) - echo "List installed EMQX versions and their status" + echo "List installed EMQX release versions and their status" ;; unpack) echo "Usage: $REL_NAME unpack [VERSION]" @@ -217,12 +217,12 @@ usage() { echo " ctl: Administration commands, execute '$REL_NAME ctl help' for more details" echo '' echo "More:" - echo " Shell attach: remote_console | attach" - echo " Up/Down-grade: upgrade | downgrade | install | uninstall" - echo " Install info: ertspath | root_dir" - echo " Runtime info: pid | ping | versions" + echo " Shell attach: remote_console | attach" +# echo " Up/Down-grade: upgrade | downgrade | install | uninstall | versions" # TODO enable when supported + echo " Install Info: ertspath | root_dir" + echo " Runtime Status: pid | ping" echo " Validate Config: check_config" - echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl" + echo " Advanced: console_clean | escript | rpc | rpcterms | eval | eval-erl" echo '' echo "Execute '$REL_NAME COMMAND help' for more information" ;; diff --git a/bin/install_upgrade.escript b/bin/install_upgrade.escript index f7f340f31..3e39c787b 100755 --- a/bin/install_upgrade.escript +++ b/bin/install_upgrade.escript @@ -18,27 +18,18 @@ main([Command0, DistInfoStr | CommandArgs]) -> Opts = parse_arguments(CommandArgs), %% invoke the command passed as argument F = case Command0 of - %% "install" -> fun(A, B) -> install(A, B) end; - %% "unpack" -> fun(A, B) -> unpack(A, B) end; - %% "upgrade" -> fun(A, B) -> upgrade(A, B) end; - %% "downgrade" -> fun(A, B) -> downgrade(A, B) end; - %% "uninstall" -> fun(A, B) -> uninstall(A, B) end; - "versions" -> fun(A, B) -> versions(A, B) end; - _ -> fun fail_upgrade/2 + "install" -> fun(A, B) -> install(A, B) end; + "unpack" -> fun(A, B) -> unpack(A, B) end; + "upgrade" -> fun(A, B) -> upgrade(A, B) end; + "downgrade" -> fun(A, B) -> downgrade(A, B) end; + "uninstall" -> fun(A, B) -> uninstall(A, B) end; + "versions" -> fun(A, B) -> versions(A, B) end end, F(DistInfo, Opts); main(Args) -> ?INFO("unknown args: ~p", [Args]), erlang:halt(1). -%% temporary block for hot-upgrades; next release will just remove -%% this and the new script version shall be used instead of this -%% current version. -%% TODO: always deny relup for macos (unsupported) -fail_upgrade(_DistInfo, _Opts) -> - ?ERROR("Unsupported upgrade path", []), - erlang:halt(1). - unpack({RelName, NameTypeArg, NodeName, Cookie}, Opts) -> TargetNode = start_distribution(NodeName, NameTypeArg, Cookie), Version = proplists:get_value(version, Opts), From f6dafc20eaa8c462e74712168ee38a7b608deff4 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 14:20:34 +0100 Subject: [PATCH 053/131] feat: export API to read cluster and local overrides --- apps/emqx/src/emqx_config.erl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx_config.erl b/apps/emqx/src/emqx_config.erl index 204e32a2b..117043911 100644 --- a/apps/emqx/src/emqx_config.erl +++ b/apps/emqx/src/emqx_config.erl @@ -24,6 +24,7 @@ init_load/2, init_load/3, read_override_conf/1, + read_override_confs/0, delete_override_conf_files/0, check_config/2, fill_defaults/1, @@ -326,9 +327,7 @@ init_load(SchemaMod, RawConf, Opts) when is_map(RawConf) -> ok = save_schema_mod_and_names(SchemaMod), %% Merge environment variable overrides on top RawConfWithEnvs = merge_envs(SchemaMod, RawConf), - ClusterOverrides = read_override_conf(#{override_to => cluster}), - LocalOverrides = read_override_conf(#{override_to => local}), - Overrides = hocon:deep_merge(ClusterOverrides, LocalOverrides), + Overrides = read_override_confs(), RawConfWithOverrides = hocon:deep_merge(RawConfWithEnvs, Overrides), RootNames = get_root_names(), RawConfAll = raw_conf_with_default(SchemaMod, RootNames, RawConfWithOverrides, Opts), @@ -337,6 +336,12 @@ init_load(SchemaMod, RawConf, Opts) when is_map(RawConf) -> save_to_app_env(AppEnvs), ok = save_to_config_map(CheckedConf, RawConfAll). +%% @doc Read merged cluster + local overrides. +read_override_confs() -> + ClusterOverrides = read_override_conf(#{override_to => cluster}), + LocalOverrides = read_override_conf(#{override_to => local}), + hocon:deep_merge(ClusterOverrides, LocalOverrides). + %% keep the raw and non-raw conf has the same keys to make update raw conf easier. raw_conf_with_default(SchemaMod, RootNames, RawConf, #{raw_with_default := true}) -> Fun = fun(Name, Acc) -> @@ -599,8 +604,16 @@ load_hocon_file(FileName, LoadType) -> case filelib:is_regular(FileName) of true -> Opts = #{include_dirs => include_dirs(), format => LoadType}, - {ok, Raw0} = hocon:load(FileName, Opts), - Raw0; + case hocon:load(FileName, Opts) of + {ok, Raw0} -> + Raw0; + {error, Reason} -> + throw(#{ + msg => failed_to_load_conf, + reason => Reason, + file => FileName + }) + end; false -> #{} end. From fce1e74c3d45e2561b8bafebc3aaee225609b1b3 Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Thu, 26 Jan 2023 00:37:12 +0200 Subject: [PATCH 054/131] fix(connector): fix redis cluster resource recovery --- .ci/docker-compose-file/.env | 2 +- .ci/docker-compose-file/Makefile.local | 6 +- .../docker-compose-redis-cluster-tcp.yaml | 56 +++++++- .../docker-compose-redis-cluster-tls.yaml | 61 +++++++-- .../docker-compose-redis-sentinel-tcp.yaml | 38 +++++- .../docker-compose-redis-sentinel-tls.yaml | 44 +++++- .ci/docker-compose-file/redis/.gitignore | 3 - .../redis/cluster-tcp/redis.conf | 18 +++ .../redis/cluster-tls/redis.conf | 28 ++++ .ci/docker-compose-file/redis/redis-tls.conf | 12 -- .ci/docker-compose-file/redis/redis.conf | 6 - .ci/docker-compose-file/redis/redis.sh | 126 ------------------ .../redis/sentinel-tcp/master.conf | 14 ++ .../redis/sentinel-tcp/sentinel-base.conf | 7 + .../redis/sentinel-tcp/slave.conf | 17 +++ .../redis/sentinel-tls/master.conf | 20 +++ .../redis/sentinel-tls/sentinel-base.conf | 14 ++ .../redis/sentinel-tls/slave.conf | 24 ++++ .github/workflows/run_test_cases.yaml | 10 +- apps/emqx_connector/docker-ct | 1 + apps/emqx_connector/rebar.config | 12 +- .../src/emqx_connector_redis.erl | 20 +-- .../test/emqx_connector_redis_SUITE.erl | 6 +- .../test/emqx_ee_bridge_redis_SUITE.erl | 5 +- mix.exs | 4 +- rebar.config | 2 +- 26 files changed, 340 insertions(+), 216 deletions(-) delete mode 100644 .ci/docker-compose-file/redis/.gitignore create mode 100644 .ci/docker-compose-file/redis/cluster-tcp/redis.conf create mode 100644 .ci/docker-compose-file/redis/cluster-tls/redis.conf delete mode 100644 .ci/docker-compose-file/redis/redis-tls.conf delete mode 100644 .ci/docker-compose-file/redis/redis.conf delete mode 100755 .ci/docker-compose-file/redis/redis.sh create mode 100644 .ci/docker-compose-file/redis/sentinel-tcp/master.conf create mode 100644 .ci/docker-compose-file/redis/sentinel-tcp/sentinel-base.conf create mode 100644 .ci/docker-compose-file/redis/sentinel-tcp/slave.conf create mode 100644 .ci/docker-compose-file/redis/sentinel-tls/master.conf create mode 100644 .ci/docker-compose-file/redis/sentinel-tls/sentinel-base.conf create mode 100644 .ci/docker-compose-file/redis/sentinel-tls/slave.conf diff --git a/.ci/docker-compose-file/.env b/.ci/docker-compose-file/.env index bd925e224..e5564efb7 100644 --- a/.ci/docker-compose-file/.env +++ b/.ci/docker-compose-file/.env @@ -1,5 +1,5 @@ MYSQL_TAG=8 -REDIS_TAG=6 +REDIS_TAG=7.0 MONGO_TAG=5 PGSQL_TAG=13 LDAP_TAG=2.4.50 diff --git a/.ci/docker-compose-file/Makefile.local b/.ci/docker-compose-file/Makefile.local index 2cf0802ce..9c12255e4 100644 --- a/.ci/docker-compose-file/Makefile.local +++ b/.ci/docker-compose-file/Makefile.local @@ -13,10 +13,10 @@ help: up: env \ MYSQL_TAG=8 \ - REDIS_TAG=6 \ + REDIS_TAG=7.0 \ MONGO_TAG=5 \ PGSQL_TAG=13 \ - docker compose \ + docker-compose \ -f .ci/docker-compose-file/docker-compose.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tcp.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tls.yaml \ @@ -34,7 +34,7 @@ up: up -d --build --remove-orphans down: - docker compose \ + docker-compose \ -f .ci/docker-compose-file/docker-compose.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tcp.yaml \ -f .ci/docker-compose-file/docker-compose-mongo-single-tls.yaml \ diff --git a/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml b/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml index 9c03fc65e..f44a71e14 100644 --- a/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-cluster-tcp.yaml @@ -1,11 +1,57 @@ version: '3.9' - services: - redis_cluster: + + redis-cluster-1: &redis-node + container_name: redis-cluster-1 image: redis:${REDIS_TAG} - container_name: redis-cluster volumes: - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node cluster && tail -f /var/log/redis-server.log" + - ./redis/cluster-tcp:/usr/local/etc/redis + command: redis-server /usr/local/etc/redis/redis.conf networks: - emqx_bridge + + + redis-cluster-2: + <<: *redis-node + container_name: redis-cluster-2 + + redis-cluster-3: + <<: *redis-node + container_name: redis-cluster-3 + + redis-cluster-4: + <<: *redis-node + container_name: redis-cluster-4 + + redis-cluster-5: + <<: *redis-node + container_name: redis-cluster-5 + + redis-cluster-6: + <<: *redis-node + container_name: redis-cluster-6 + + redis-cluster-create: + <<: *redis-node + container_name: redis-cluster-create + command: > + redis-cli + --cluster create + redis-cluster-1:6379 + redis-cluster-2:6379 + redis-cluster-3:6379 + redis-cluster-4:6379 + redis-cluster-5:6379 + redis-cluster-6:6379 + --cluster-replicas 1 + --cluster-yes + --pass "public" + --no-auth-warning + depends_on: + - redis-cluster-1 + - redis-cluster-2 + - redis-cluster-3 + - redis-cluster-4 + - redis-cluster-5 + - redis-cluster-6 + diff --git a/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml b/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml index bfbf1a4a3..988620acb 100644 --- a/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-cluster-tls.yaml @@ -1,14 +1,59 @@ version: '3.9' - services: - redis_cluster_tls: - container_name: redis-cluster-tls + + redis-cluster-tls-1: &redis-node + container_name: redis-cluster-tls-1 image: redis:${REDIS_TAG} volumes: - - ../../apps/emqx/etc/certs/cacert.pem:/etc/certs/ca.crt - - ../../apps/emqx/etc/certs/cert.pem:/etc/certs/redis.crt - - ../../apps/emqx/etc/certs/key.pem:/etc/certs/redis.key - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node cluster --tls-enabled && tail -f /var/log/redis-server.log" + - ./redis/cluster-tls:/usr/local/etc/redis + - ../../apps/emqx/etc/certs:/etc/certs + command: redis-server /usr/local/etc/redis/redis.conf networks: - emqx_bridge + + redis-cluster-tls-2: + <<: *redis-node + container_name: redis-cluster-tls-2 + + redis-cluster-tls-3: + <<: *redis-node + container_name: redis-cluster-tls-3 + + redis-cluster-tls-4: + <<: *redis-node + container_name: redis-cluster-tls-4 + + redis-cluster-tls-5: + <<: *redis-node + container_name: redis-cluster-tls-5 + + redis-cluster-tls-6: + <<: *redis-node + container_name: redis-cluster-tls-6 + + redis-cluster-tls-create: + <<: *redis-node + container_name: redis-cluster-tls-create + command: > + redis-cli + --cluster create + redis-cluster-tls-1:6389 + redis-cluster-tls-2:6389 + redis-cluster-tls-3:6389 + redis-cluster-tls-4:6389 + redis-cluster-tls-5:6389 + redis-cluster-tls-6:6389 + --cluster-replicas 1 + --cluster-yes + --pass "public" + --no-auth-warning + --tls + --insecure + depends_on: + - redis-cluster-tls-1 + - redis-cluster-tls-2 + - redis-cluster-tls-3 + - redis-cluster-tls-4 + - redis-cluster-tls-5 + - redis-cluster-tls-6 + diff --git a/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml b/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml index 07c6cfb0a..d395edd2b 100644 --- a/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-sentinel-tcp.yaml @@ -1,11 +1,41 @@ -version: '3.9' +version: "3" services: - redis_sentinel_server: + + redis-sentinel-master: + container_name: redis-sentinel-master + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tcp:/usr/local/etc/redis + command: redis-server /usr/local/etc/redis/master.conf + networks: + - emqx_bridge + + redis-sentinel-slave: + container_name: redis-sentinel-slave + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tcp:/usr/local/etc/redis + command: redis-server /usr/local/etc/redis/slave.conf + networks: + - emqx_bridge + depends_on: + - redis-sentinel-master + + redis-sentinel: container_name: redis-sentinel image: redis:${REDIS_TAG} volumes: - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node sentinel && tail -f /var/log/redis-server.log" + - ./redis/sentinel-tcp/sentinel-base.conf:/usr/local/etc/redis/sentinel-base.conf + depends_on: + - redis-sentinel-master + - redis-sentinel-slave + command: > + bash -c "cp -f /usr/local/etc/redis/sentinel-base.conf /usr/local/etc/redis/sentinel.conf && + redis-sentinel /usr/local/etc/redis/sentinel.conf" networks: - emqx_bridge + + + + diff --git a/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml b/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml index b9eaefa9c..d883e2992 100644 --- a/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml +++ b/.ci/docker-compose-file/docker-compose-redis-sentinel-tls.yaml @@ -1,14 +1,44 @@ -version: '3.9' +version: "3" services: - redis_sentinel_server_tls: + + redis-sentinel-tls-master: + container_name: redis-sentinel-tls-master + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tls:/usr/local/etc/redis + - ../../apps/emqx/etc/certs:/etc/certs + command: redis-server /usr/local/etc/redis/master.conf + networks: + - emqx_bridge + + redis-sentinel-tls-slave: + container_name: redis-sentinel-tls-slave + image: redis:${REDIS_TAG} + volumes: + - ./redis/sentinel-tls:/usr/local/etc/redis + - ../../apps/emqx/etc/certs:/etc/certs + command: redis-server /usr/local/etc/redis/slave.conf + networks: + - emqx_bridge + depends_on: + - redis-sentinel-tls-master + + redis-sentinel-tls: container_name: redis-sentinel-tls image: redis:${REDIS_TAG} volumes: - - ../../apps/emqx/etc/certs/cacert.pem:/etc/certs/ca.crt - - ../../apps/emqx/etc/certs/cert.pem:/etc/certs/redis.crt - - ../../apps/emqx/etc/certs/key.pem:/etc/certs/redis.key - - ./redis/:/data/conf - command: bash -c "/bin/bash /data/conf/redis.sh --node sentinel --tls-enabled && tail -f /var/log/redis-server.log" + - ./redis/sentinel-tls/sentinel-base.conf:/usr/local/etc/redis/sentinel-base.conf + - ../../apps/emqx/etc/certs:/etc/certs + depends_on: + - redis-sentinel-tls-master + - redis-sentinel-tls-slave + command: > + bash -c "cp -f /usr/local/etc/redis/sentinel-base.conf /usr/local/etc/redis/sentinel.conf && + redis-sentinel /usr/local/etc/redis/sentinel.conf" networks: - emqx_bridge + + + + diff --git a/.ci/docker-compose-file/redis/.gitignore b/.ci/docker-compose-file/redis/.gitignore deleted file mode 100644 index 23ffe8469..000000000 --- a/.ci/docker-compose-file/redis/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -r700?i.log -nodes.700?.conf -*.rdb diff --git a/.ci/docker-compose-file/redis/cluster-tcp/redis.conf b/.ci/docker-compose-file/redis/cluster-tcp/redis.conf new file mode 100644 index 000000000..79a0d8a73 --- /dev/null +++ b/.ci/docker-compose-file/redis/cluster-tcp/redis.conf @@ -0,0 +1,18 @@ +bind :: 0.0.0.0 +port 6379 +requirepass public + +cluster-enabled yes + +masterauth public + +protected-mode no +daemonize no + +loglevel notice +logfile "" + +always-show-logo no +save "" +appendonly no + diff --git a/.ci/docker-compose-file/redis/cluster-tls/redis.conf b/.ci/docker-compose-file/redis/cluster-tls/redis.conf new file mode 100644 index 000000000..3020f46a7 --- /dev/null +++ b/.ci/docker-compose-file/redis/cluster-tls/redis.conf @@ -0,0 +1,28 @@ +bind :: 0.0.0.0 +port 6379 +requirepass public + +cluster-enabled yes + +masterauth public + +tls-port 6389 +tls-cert-file /etc/certs/cert.pem +tls-key-file /etc/certs/key.pem +tls-ca-cert-file /etc/certs/cacert.pem +tls-auth-clients no + +tls-replication yes +tls-cluster yes + + +protected-mode no +daemonize no + +loglevel notice +logfile "" + +always-show-logo no +save "" +appendonly no + diff --git a/.ci/docker-compose-file/redis/redis-tls.conf b/.ci/docker-compose-file/redis/redis-tls.conf deleted file mode 100644 index c503dc2e8..000000000 --- a/.ci/docker-compose-file/redis/redis-tls.conf +++ /dev/null @@ -1,12 +0,0 @@ -daemonize yes -bind 0.0.0.0 :: -logfile /var/log/redis-server.log -protected-mode no -requirepass public -masterauth public - -tls-cert-file /etc/certs/redis.crt -tls-key-file /etc/certs/redis.key -tls-ca-cert-file /etc/certs/ca.crt -tls-replication yes -tls-cluster yes diff --git a/.ci/docker-compose-file/redis/redis.conf b/.ci/docker-compose-file/redis/redis.conf deleted file mode 100644 index 484d9abf9..000000000 --- a/.ci/docker-compose-file/redis/redis.conf +++ /dev/null @@ -1,6 +0,0 @@ -daemonize yes -bind 0.0.0.0 :: -logfile /var/log/redis-server.log -protected-mode no -requirepass public -masterauth public diff --git a/.ci/docker-compose-file/redis/redis.sh b/.ci/docker-compose-file/redis/redis.sh deleted file mode 100755 index be6462249..000000000 --- a/.ci/docker-compose-file/redis/redis.sh +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash - -set -x - -LOCAL_IP=$(hostname -i | grep -oE '((25[0-5]|(2[0-4]|1[0-9]|[1-9]|)[0-9])\.){3}(25[0-5]|(2[0-4]|1[0-9]|[1-9]|)[0-9])' | head -n 1) - -node=single -tls=false -while [[ $# -gt 0 ]] -do -key="$1" - -case $key in - -n|--node) - node="$2" - shift # past argument - shift # past value - ;; - --tls-enabled) - tls=true - shift # past argument - ;; - *) - shift # past argument - ;; -esac -done - -rm -f \ - /data/conf/r7000i.log \ - /data/conf/r7001i.log \ - /data/conf/r7002i.log \ - /data/conf/nodes.7000.conf \ - /data/conf/nodes.7001.conf \ - /data/conf/nodes.7002.conf - -if [ "$node" = "cluster" ]; then - if $tls; then - redis-server /data/conf/redis-tls.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --tls-port 8000 --cluster-enabled yes - redis-server /data/conf/redis-tls.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --tls-port 8001 --cluster-enabled yes - redis-server /data/conf/redis-tls.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --tls-port 8002 --cluster-enabled yes - else - redis-server /data/conf/redis.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --cluster-enabled yes - redis-server /data/conf/redis.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --cluster-enabled yes - redis-server /data/conf/redis.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --cluster-enabled yes - fi -elif [ "$node" = "sentinel" ]; then - if $tls; then - redis-server /data/conf/redis-tls.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --tls-port 8000 --cluster-enabled no - redis-server /data/conf/redis-tls.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --tls-port 8001 --cluster-enabled no --slaveof "$LOCAL_IP" 8000 - redis-server /data/conf/redis-tls.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --tls-port 8002 --cluster-enabled no --slaveof "$LOCAL_IP" 8000 - - else - redis-server /data/conf/redis.conf --port 7000 --cluster-config-file /data/conf/nodes.7000.conf \ - --cluster-enabled no - redis-server /data/conf/redis.conf --port 7001 --cluster-config-file /data/conf/nodes.7001.conf \ - --cluster-enabled no --slaveof "$LOCAL_IP" 7000 - redis-server /data/conf/redis.conf --port 7002 --cluster-config-file /data/conf/nodes.7002.conf \ - --cluster-enabled no --slaveof "$LOCAL_IP" 7000 - fi -fi - -REDIS_LOAD_FLG=true - -while $REDIS_LOAD_FLG; -do - sleep 1 - redis-cli --pass public --no-auth-warning -p 7000 info 1> /data/conf/r7000i.log 2> /dev/null - if ! [ -s /data/conf/r7000i.log ]; then - continue - fi - redis-cli --pass public --no-auth-warning -p 7001 info 1> /data/conf/r7001i.log 2> /dev/null - if ! [ -s /data/conf/r7001i.log ]; then - continue - fi - redis-cli --pass public --no-auth-warning -p 7002 info 1> /data/conf/r7002i.log 2> /dev/null; - if ! [ -s /data/conf/r7002i.log ]; then - continue - fi - if [ "$node" = "cluster" ] ; then - if $tls; then - yes "yes" | redis-cli --cluster create "$LOCAL_IP:8000" "$LOCAL_IP:8001" "$LOCAL_IP:8002" \ - --pass public --no-auth-warning \ - --tls true --cacert /etc/certs/ca.crt \ - --cert /etc/certs/redis.crt --key /etc/certs/redis.key - else - yes "yes" | redis-cli --cluster create "$LOCAL_IP:7000" "$LOCAL_IP:7001" "$LOCAL_IP:7002" \ - --pass public --no-auth-warning - fi - elif [ "$node" = "sentinel" ]; then - tee /_sentinel.conf>/dev/null << EOF -port 26379 -bind 0.0.0.0 :: -daemonize yes -logfile /var/log/redis-server.log -dir /tmp -EOF - if $tls; then - cat >>/_sentinel.conf<>/_sentinel.conf< [{ssl, false}] end ++ [{sentinel, maps:get(sentinel, Config, undefined)}], - PoolName = emqx_plugin_libs_pool:pool_name(InstId), + PoolName = InstId, State = #{poolname => PoolName, type => Type}, case Type of cluster -> @@ -225,26 +225,10 @@ is_unrecoverable_error({error, <<"ERR unknown command ", _/binary>>}) -> is_unrecoverable_error(_) -> false. -extract_eredis_cluster_workers(PoolName) -> - lists:flatten([ - gen_server:call(PoolPid, get_all_workers) - || PoolPid <- eredis_cluster_monitor:get_all_pools(PoolName) - ]). - -eredis_cluster_workers_exist_and_are_connected(Workers) -> - length(Workers) > 0 andalso - lists:all( - fun({_, Pid, _, _}) -> - eredis_cluster_pool_worker:is_connected(Pid) =:= true - end, - Workers - ). - on_get_status(_InstId, #{type := cluster, poolname := PoolName}) -> case eredis_cluster:pool_exists(PoolName) of true -> - Workers = extract_eredis_cluster_workers(PoolName), - Health = eredis_cluster_workers_exist_and_are_connected(Workers), + Health = eredis_cluster:ping_all(PoolName), status_result(Health); false -> disconnected diff --git a/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl b/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl index 87d2b8e21..3a134ad35 100644 --- a/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl +++ b/apps/emqx_connector/test/emqx_connector_redis_SUITE.erl @@ -27,6 +27,8 @@ -define(REDIS_SINGLE_PORT, 6379). -define(REDIS_SENTINEL_HOST, "redis-sentinel"). -define(REDIS_SENTINEL_PORT, 26379). +-define(REDIS_CLUSTER_HOST, "redis-cluster-1"). +-define(REDIS_CLUSTER_PORT, 6379). -define(REDIS_RESOURCE_MOD, emqx_connector_redis). all() -> @@ -203,8 +205,8 @@ redis_config_base(Type, ServerKey) -> MaybeSentinel = "", MaybeDatabase = " database = 1\n"; "cluster" -> - Host = ?REDIS_SINGLE_HOST, - Port = ?REDIS_SINGLE_PORT, + Host = ?REDIS_CLUSTER_HOST, + Port = ?REDIS_CLUSTER_PORT, MaybeSentinel = "", MaybeDatabase = "" end, diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl index 2b67787b2..901e74d10 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl @@ -479,12 +479,13 @@ redis_connect_configs() -> }, redis_cluster => #{ tcp => #{ - <<"servers">> => <<"redis-cluster:7000,redis-cluster:7001,redis-cluster:7002">>, + <<"servers">> => + <<"redis-cluster-1:6379,redis-cluster-2:6379,redis-cluster-3:6379">>, <<"redis_type">> => <<"cluster">> }, tls => #{ <<"servers">> => - <<"redis-cluster-tls:8000,redis-cluster-tls:8001,redis-cluster-tls:8002">>, + <<"redis-cluster-tls-1:6389,redis-cluster-tls-2:6389,redis-cluster-tls-3:6389">>, <<"redis_type">> => <<"cluster">>, <<"ssl">> => redis_connect_ssl_opts(redis_cluster) } diff --git a/mix.exs b/mix.exs index 315212fbe..8e0844aaa 100644 --- a/mix.exs +++ b/mix.exs @@ -57,7 +57,7 @@ defmodule EMQXUmbrella.MixProject do {:gen_rpc, github: "emqx/gen_rpc", tag: "2.8.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.7", override: true}, {:minirest, github: "emqx/minirest", tag: "1.3.7", override: true}, - {:ecpool, github: "emqx/ecpool", tag: "0.5.2", override: true}, + {:ecpool, github: "emqx/ecpool", tag: "0.5.3", override: true}, {:replayq, github: "emqx/replayq", tag: "0.3.6", override: true}, {:pbkdf2, github: "emqx/erlang-pbkdf2", tag: "2.0.4", override: true}, {:emqtt, github: "emqx/emqtt", tag: "1.7.0-rc.2", override: true}, @@ -76,8 +76,6 @@ defmodule EMQXUmbrella.MixProject do {:gun, github: "emqx/gun", tag: "1.3.9", override: true}, # in conflict by emqx_connector and system_monitor {:epgsql, github: "emqx/epgsql", tag: "4.7.0.1", override: true}, - # in conflict by mongodb and eredis_cluster - {:poolboy, github: "emqx/poolboy", tag: "1.5.2", override: true}, # in conflict by emqx and observer_cli {:recon, github: "ferd/recon", tag: "2.5.1", override: true}, {:jsx, github: "talentdeficit/jsx", tag: "v3.1.0", override: true}, diff --git a/rebar.config b/rebar.config index a1b4df520..7e0b50653 100644 --- a/rebar.config +++ b/rebar.config @@ -59,7 +59,7 @@ , {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}} , {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.7"}}} , {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.7"}}} - , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.2"}}} + , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.3"}}} , {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.6"}}} , {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}} , {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.7.0-rc.2"}}} From d0c10b59aa1527dfa537cef34559232f97ceaaa1 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 30 Jan 2023 18:35:17 +0300 Subject: [PATCH 055/131] feat(mqtt-bridge): avoid middleman process Instead, supervise `emqtt` client process directly. --- .../test/emqx_bridge_api_SUITE.erl | 2 +- .../test/emqx_bridge_mqtt_SUITE.erl | 8 +- .../src/emqx_connector_mqtt.erl | 78 ++- .../src/mqtt/emqx_connector_mqtt_mod.erl | 235 ------- .../src/mqtt/emqx_connector_mqtt_worker.erl | 627 ++++++++---------- .../test/emqx_connector_mqtt_tests.erl | 60 -- .../test/emqx_connector_mqtt_worker_tests.erl | 101 --- 7 files changed, 338 insertions(+), 773 deletions(-) delete mode 100644 apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl delete mode 100644 apps/emqx_connector/test/emqx_connector_mqtt_tests.erl delete mode 100644 apps/emqx_connector/test/emqx_connector_mqtt_worker_tests.erl diff --git a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl index 6b0b3a941..5cb78d3ba 100644 --- a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl @@ -640,7 +640,7 @@ t_bridges_probe(Config) -> ?assertMatch( #{ <<"code">> := <<"TEST_FAILED">>, - <<"message">> := <<"#{reason => econnrefused", _/binary>> + <<"message">> := <<"econnrefused">> }, jsx:decode(ConnRefused) ), diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index cd5a17184..6e3bf77ee 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -825,15 +825,15 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF, - %% to make it reconnect quickly - <<"reconnect_interval">> => <<"1s">>, <<"resource_opts">> => #{ <<"worker_pool_size">> => 2, <<"query_mode">> => <<"sync">>, %% using a long time so we can test recovery <<"request_timeout">> => <<"15s">>, %% to make it check the healthy quickly - <<"health_check_interval">> => <<"0.5s">> + <<"health_check_interval">> => <<"0.5s">>, + %% to make it reconnect quickly + <<"auto_restart_interval">> => <<"1s">> } } ), @@ -911,7 +911,7 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> Decoded1 = jsx:decode(BridgeStr1), DecodedMetrics1 = jsx:decode(BridgeMetricsStr1), ?assertMatch( - Status when (Status == <<"connected">> orelse Status == <<"connecting">>), + Status when (Status == <<"connecting">> orelse Status == <<"disconnected">>), maps:get(<<"status">>, Decoded1) ), %% matched >= 3 because of possible retries. diff --git a/apps/emqx_connector/src/emqx_connector_mqtt.erl b/apps/emqx_connector/src/emqx_connector_mqtt.erl index 585122539..462bac0b8 100644 --- a/apps/emqx_connector/src/emqx_connector_mqtt.erl +++ b/apps/emqx_connector/src/emqx_connector_mqtt.erl @@ -105,16 +105,15 @@ init([]) -> {ok, {SupFlag, []}}. bridge_spec(Config) -> + {Name, NConfig} = maps:take(name, Config), #{ - id => maps:get(name, Config), - start => {emqx_connector_mqtt_worker, start_link, [Config]}, - restart => permanent, - shutdown => 5000, - type => worker, - modules => [emqx_connector_mqtt_worker] + id => Name, + start => {emqx_connector_mqtt_worker, start_link, [Name, NConfig]}, + restart => temporary, + shutdown => 5000 }. --spec bridges() -> [{node(), map()}]. +-spec bridges() -> [{_Name, _Status}]. bridges() -> [ {Name, emqx_connector_mqtt_worker:status(Name)} @@ -144,8 +143,7 @@ on_message_received(Msg, HookPoint, ResId) -> %% =================================================================== callback_mode() -> async_if_possible. -on_start(InstId, Conf) -> - InstanceId = binary_to_atom(InstId, utf8), +on_start(InstanceId, Conf) -> ?SLOG(info, #{ msg => "starting_mqtt_connector", connector => InstanceId, @@ -154,8 +152,8 @@ on_start(InstId, Conf) -> BasicConf = basic_config(Conf), BridgeConf = BasicConf#{ name => InstanceId, - clientid => clientid(InstId, Conf), - subscriptions => make_sub_confs(maps:get(ingress, Conf, undefined), Conf, InstId), + clientid => clientid(InstanceId, Conf), + subscriptions => make_sub_confs(maps:get(ingress, Conf, undefined), Conf, InstanceId), forwards => make_forward_confs(maps:get(egress, Conf, undefined)) }, case ?MODULE:create_bridge(BridgeConf) of @@ -189,35 +187,49 @@ on_stop(_InstId, #{name := InstanceId}) -> on_query(_InstId, {send_message, Msg}, #{name := InstanceId}) -> ?TRACE("QUERY", "send_msg_to_remote_node", #{message => Msg, connector => InstanceId}), - emqx_connector_mqtt_worker:send_to_remote(InstanceId, Msg). - -on_query_async( - _InstId, - {send_message, Msg}, - {ReplyFun, Args}, - #{name := InstanceId} -) -> - ?TRACE("QUERY", "async_send_msg_to_remote_node", #{message => Msg, connector => InstanceId}), - emqx_connector_mqtt_worker:send_to_remote_async(InstanceId, Msg, {ReplyFun, Args}). - -on_get_status(_InstId, #{name := InstanceId}) -> - case emqx_connector_mqtt_worker:status(InstanceId) of - connected -> connected; - _ -> connecting + case emqx_connector_mqtt_worker:send_to_remote(InstanceId, Msg) of + ok -> + ok; + {error, Reason} -> + classify_error(Reason) end. +on_query_async(_InstId, {send_message, Msg}, Callback, #{name := InstanceId}) -> + ?TRACE("QUERY", "async_send_msg_to_remote_node", #{message => Msg, connector => InstanceId}), + case emqx_connector_mqtt_worker:send_to_remote_async(InstanceId, Msg, Callback) of + ok -> + % TODO this is racy + {ok, emqx_connector_mqtt_worker:pid(InstanceId)}; + {error, Reason} -> + classify_error(Reason) + end. + +on_get_status(_InstId, #{name := InstanceId}) -> + emqx_connector_mqtt_worker:status(InstanceId). + +classify_error(disconnected = Reason) -> + {error, {recoverable_error, Reason}}; +classify_error({disconnected, _RC, _} = Reason) -> + {error, {recoverable_error, Reason}}; +classify_error({shutdown, _} = Reason) -> + {error, {recoverable_error, Reason}}; +classify_error(Reason) -> + {error, {unrecoverable_error, Reason}}. + ensure_mqtt_worker_started(InstanceId, BridgeConf) -> - case emqx_connector_mqtt_worker:ensure_started(InstanceId) of - ok -> {ok, #{name => InstanceId, bridge_conf => BridgeConf}}; - {error, Reason} -> {error, Reason} + case emqx_connector_mqtt_worker:connect(InstanceId) of + {ok, Properties} -> + {ok, #{name => InstanceId, config => BridgeConf, props => Properties}}; + {error, Reason} -> + {error, Reason} end. make_sub_confs(EmptyMap, _Conf, _) when map_size(EmptyMap) == 0 -> undefined; make_sub_confs(undefined, _Conf, _) -> undefined; -make_sub_confs(SubRemoteConf, Conf, InstId) -> - ResId = emqx_resource_manager:manager_id_to_resource_id(InstId), +make_sub_confs(SubRemoteConf, Conf, InstanceId) -> + ResId = emqx_resource_manager:manager_id_to_resource_id(InstanceId), case maps:find(hookpoint, Conf) of error -> error({no_hookpoint_provided, Conf}); @@ -251,7 +263,6 @@ basic_config( %% 30s connect_timeout => 30, auto_reconnect => true, - reconnect_interval => ?AUTO_RECONNECT_INTERVAL, proto_ver => ProtoVer, %% Opening bridge_mode will form a non-standard mqtt connection message. %% A load balancing server (such as haproxy) is often set up before the emqx broker server. @@ -264,8 +275,7 @@ basic_config( retry_interval => RetryIntv, max_inflight => MaxInflight, ssl => EnableSsl, - ssl_opts => maps:to_list(maps:remove(enable, Ssl)), - if_record_metrics => true + ssl_opts => maps:to_list(maps:remove(enable, Ssl)) }, maybe_put_fields([username, password], Conf, BasicConf). diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl deleted file mode 100644 index 6acbe3bb4..000000000 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_mod.erl +++ /dev/null @@ -1,235 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - -%% @doc This module implements EMQX Bridge transport layer on top of MQTT protocol - --module(emqx_connector_mqtt_mod). - --export([ - start/1, - send/2, - send_async/3, - stop/1, - ping/1 -]). - --export([info/2]). - --export([ - ensure_subscribed/3, - ensure_unsubscribed/2 -]). - -%% callbacks for emqtt --export([ - handle_publish/3, - handle_disconnected/2 -]). - --include_lib("emqx/include/logger.hrl"). - -%%-------------------------------------------------------------------- -%% emqx_bridge_connect callbacks -%%-------------------------------------------------------------------- - -start(Config) -> - Parent = self(), - ServerStr = iolist_to_binary(maps:get(server, Config)), - {Server, Port} = emqx_connector_mqtt_schema:parse_server(ServerStr), - Mountpoint = maps:get(receive_mountpoint, Config, undefined), - Subscriptions = maps:get(subscriptions, Config, undefined), - Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Subscriptions), - Handlers = make_hdlr(Parent, Vars, #{server => ServerStr}), - Config1 = Config#{ - msg_handler => Handlers, - host => Server, - port => Port, - force_ping => true, - proto_ver => maps:get(proto_ver, Config, v4) - }, - case emqtt:start_link(process_config(Config1)) of - {ok, Pid} -> - case emqtt:connect(Pid) of - {ok, _} -> - try - ok = sub_remote_topics(Pid, Subscriptions), - {ok, #{client_pid => Pid, subscriptions => Subscriptions}} - catch - throw:Reason -> - ok = stop(#{client_pid => Pid}), - {error, error_reason(Reason, ServerStr)} - end; - {error, Reason} -> - ok = stop(#{client_pid => Pid}), - {error, error_reason(Reason, ServerStr)} - end; - {error, Reason} -> - {error, error_reason(Reason, ServerStr)} - end. - -error_reason(Reason, ServerStr) -> - #{reason => Reason, server => ServerStr}. - -stop(#{client_pid := Pid}) -> - safe_stop(Pid, fun() -> emqtt:stop(Pid) end, 1000), - ok. - -ping(undefined) -> - pang; -ping(#{client_pid := Pid}) -> - emqtt:ping(Pid). - -info(pid, #{client_pid := Pid}) -> - Pid. - -ensure_subscribed(#{client_pid := Pid, subscriptions := Subs} = Conn, Topic, QoS) when - is_pid(Pid) --> - case emqtt:subscribe(Pid, Topic, QoS) of - {ok, _, _} -> Conn#{subscriptions => [{Topic, QoS} | Subs]}; - Error -> {error, Error} - end; -ensure_subscribed(_Conn, _Topic, _QoS) -> - %% return ok for now - %% next re-connect should should call start with new topic added to config - ok. - -ensure_unsubscribed(#{client_pid := Pid, subscriptions := Subs} = Conn, Topic) when is_pid(Pid) -> - case emqtt:unsubscribe(Pid, Topic) of - {ok, _, _} -> Conn#{subscriptions => lists:keydelete(Topic, 1, Subs)}; - Error -> {error, Error} - end; -ensure_unsubscribed(Conn, _) -> - %% return ok for now - %% next re-connect should should call start with this topic deleted from config - Conn. - -safe_stop(Pid, StopF, Timeout) -> - MRef = monitor(process, Pid), - unlink(Pid), - try - StopF() - catch - _:_ -> - ok - end, - receive - {'DOWN', MRef, _, _, _} -> - ok - after Timeout -> - exit(Pid, kill) - end. - -send(#{client_pid := ClientPid}, Msg) -> - emqtt:publish(ClientPid, Msg). - -send_async(#{client_pid := ClientPid}, Msg, Callback) -> - emqtt:publish_async(ClientPid, Msg, infinity, Callback). - -handle_publish(Msg, undefined, _Opts) -> - ?SLOG(error, #{ - msg => - "cannot_publish_to_local_broker_as" - "_'ingress'_is_not_configured", - message => Msg - }); -handle_publish(#{properties := Props} = Msg0, Vars, Opts) -> - Msg = format_msg_received(Msg0, Opts), - ?SLOG(debug, #{ - msg => "publish_to_local_broker", - message => Msg, - vars => Vars - }), - case Vars of - #{on_message_received := {Mod, Func, Args}} -> - _ = erlang:apply(Mod, Func, [Msg | Args]); - _ -> - ok - end, - maybe_publish_to_local_broker(Msg, Vars, Props). - -handle_disconnected(Reason, Parent) -> - Parent ! {disconnected, self(), Reason}. - -make_hdlr(Parent, Vars, Opts) -> - #{ - publish => {fun ?MODULE:handle_publish/3, [Vars, Opts]}, - disconnected => {fun ?MODULE:handle_disconnected/2, [Parent]} - }. - -sub_remote_topics(_ClientPid, undefined) -> - ok; -sub_remote_topics(ClientPid, #{remote := #{topic := FromTopic, qos := QoS}}) -> - case emqtt:subscribe(ClientPid, FromTopic, QoS) of - {ok, _, _} -> ok; - Error -> throw(Error) - end. - -process_config(Config) -> - maps:without([conn_type, address, receive_mountpoint, subscriptions, name], Config). - -maybe_publish_to_local_broker(Msg, Vars, Props) -> - case emqx_map_lib:deep_get([local, topic], Vars, undefined) of - %% local topic is not set, discard it - undefined -> ok; - _ -> emqx_broker:publish(emqx_connector_mqtt_msg:to_broker_msg(Msg, Vars, Props)) - end. - -format_msg_received( - #{ - dup := Dup, - payload := Payload, - properties := Props, - qos := QoS, - retain := Retain, - topic := Topic - }, - #{server := Server} -) -> - #{ - id => emqx_guid:to_hexstr(emqx_guid:gen()), - server => Server, - payload => Payload, - topic => Topic, - qos => QoS, - dup => Dup, - retain => Retain, - pub_props => printable_maps(Props), - message_received_at => erlang:system_time(millisecond) - }. - -printable_maps(undefined) -> - #{}; -printable_maps(Headers) -> - maps:fold( - fun - ('User-Property', V0, AccIn) when is_list(V0) -> - AccIn#{ - 'User-Property' => maps:from_list(V0), - 'User-Property-Pairs' => [ - #{ - key => Key, - value => Value - } - || {Key, Value} <- V0 - ] - }; - (K, V0, AccIn) -> - AccIn#{K => V0} - end, - #{}, - Headers - ). diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index 776d2d8d9..85261a063 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -60,172 +60,252 @@ %% * Local messages are all normalised to QoS-1 when exporting to remote -module(emqx_connector_mqtt_worker). --behaviour(gen_statem). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("emqx/include/logger.hrl"). %% APIs -export([ - start_link/1, - stop/1 -]). - -%% gen_statem callbacks --export([ - terminate/3, - code_change/4, - init/1, - callback_mode/0 -]). - -%% state functions --export([ - idle/3, - connected/3 + start_link/2, + stop/1, + pid/1 ]). %% management APIs -export([ - ensure_started/1, - ensure_stopped/1, + connect/1, status/1, ping/1, send_to_remote/2, send_to_remote_async/3 ]). --export([get_forwards/1]). - --export([get_subscriptions/1]). +-export([handle_publish/3]). +-export([handle_disconnect/1]). -export_type([ config/0, ack_ref/0 ]). --type id() :: atom() | string() | pid(). --type qos() :: emqx_types:qos(). +-type name() :: term(). +% -type qos() :: emqx_types:qos(). -type config() :: map(). -type ack_ref() :: term(). --type topic() :: emqx_types:topic(). +% -type topic() :: emqx_types:topic(). -include_lib("emqx/include/logger.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). -%% same as default in-flight limit for emqtt --define(DEFAULT_INFLIGHT_SIZE, 32). --define(DEFAULT_RECONNECT_DELAY_MS, timer:seconds(5)). +-define(REF(Name), {via, gproc, ?NAME(Name)}). +-define(NAME(Name), {n, l, Name}). %% @doc Start a bridge worker. Supported configs: -%% start_type: 'manual' (default) or 'auto', when manual, bridge will stay -%% at 'idle' state until a manual call to start it. -%% connect_module: The module which implements emqx_bridge_connect behaviour -%% and work as message batch transport layer -%% reconnect_interval: Delay in milli-seconds for the bridge worker to retry -%% in case of transportation failure. -%% max_inflight: Max number of batches allowed to send-ahead before receiving -%% confirmation from remote node/cluster %% mountpoint: The topic mount point for messages sent to remote node/cluster %% `undefined', `<<>>' or `""' to disable %% forwards: Local topics to subscribe. %% %% Find more connection specific configs in the callback modules %% of emqx_bridge_connect behaviour. -start_link(Opts) when is_list(Opts) -> - start_link(maps:from_list(Opts)); -start_link(Opts) -> - case maps:get(name, Opts, undefined) of - undefined -> - gen_statem:start_link(?MODULE, Opts, []); - Name -> - Name1 = name(Name), - gen_statem:start_link({local, Name1}, ?MODULE, Opts#{name => Name1}, []) +-spec start_link(name(), map()) -> + {ok, pid()} | {error, _Reason}. +start_link(Name, BridgeOpts) -> + ?SLOG(debug, #{ + msg => "client_starting", + name => Name, + options => BridgeOpts + }), + Conf = init_config(BridgeOpts), + Options = mk_client_options(Conf, BridgeOpts), + case emqtt:start_link(Options) of + {ok, Pid} -> + true = gproc:reg_other(?NAME(Name), Pid, Conf), + {ok, Pid}; + {error, Reason} = Error -> + ?SLOG(error, #{ + msg => "client_start_failed", + config => emqx_misc:redact(BridgeOpts), + reason => Reason + }), + Error end. -ensure_started(Name) -> - gen_statem:call(name(Name), ensure_started). - -%% @doc Manually stop bridge worker. State idempotency ensured. -ensure_stopped(Name) -> - gen_statem:call(name(Name), ensure_stopped, 5000). - -stop(Pid) -> gen_statem:stop(Pid). - -status(Pid) when is_pid(Pid) -> - gen_statem:call(Pid, status); -status(Name) -> - gen_statem:call(name(Name), status). - -ping(Pid) when is_pid(Pid) -> - gen_statem:call(Pid, ping); -ping(Name) -> - gen_statem:call(name(Name), ping). - -send_to_remote(Pid, Msg) when is_pid(Pid) -> - gen_statem:call(Pid, {send_to_remote, Msg}); -send_to_remote(Name, Msg) -> - gen_statem:call(name(Name), {send_to_remote, Msg}). - -send_to_remote_async(Pid, Msg, Callback) when is_pid(Pid) -> - gen_statem:call(Pid, {send_to_remote_async, Msg, Callback}); -send_to_remote_async(Name, Msg, Callback) -> - gen_statem:call(name(Name), {send_to_remote_async, Msg, Callback}). - -%% @doc Return all forwards (local subscriptions). --spec get_forwards(id()) -> [topic()]. -get_forwards(Name) -> gen_statem:call(name(Name), get_forwards, timer:seconds(1000)). - -%% @doc Return all subscriptions (subscription over mqtt connection to remote broker). --spec get_subscriptions(id()) -> [{emqx_types:topic(), qos()}]. -get_subscriptions(Name) -> gen_statem:call(name(Name), get_subscriptions). - -callback_mode() -> [state_functions]. - -%% @doc Config should be a map(). -init(#{name := Name} = ConnectOpts) -> - ?SLOG(debug, #{ - msg => "starting_bridge_worker", - name => Name - }), - erlang:process_flag(trap_exit, true), - State = init_state(ConnectOpts), - self() ! idle, - {ok, idle, State#{ - connect_opts => pre_process_opts(ConnectOpts) - }}. - -init_state(Opts) -> - ReconnDelayMs = maps:get(reconnect_interval, Opts, ?DEFAULT_RECONNECT_DELAY_MS), - StartType = maps:get(start_type, Opts, manual), +init_config(Opts) -> Mountpoint = maps:get(forward_mountpoint, Opts, undefined), - MaxInflightSize = maps:get(max_inflight, Opts, ?DEFAULT_INFLIGHT_SIZE), - Name = maps:get(name, Opts, undefined), + Subscriptions = maps:get(subscriptions, Opts, undefined), + Forwards = maps:get(forwards, Opts, undefined), #{ - start_type => StartType, - reconnect_interval => ReconnDelayMs, mountpoint => format_mountpoint(Mountpoint), - max_inflight => MaxInflightSize, - connection => undefined, - name => Name + subscriptions => pre_process_subscriptions(Subscriptions), + forwards => pre_process_forwards(Forwards) }. -pre_process_opts(#{subscriptions := InConf, forwards := OutConf} = ConnectOpts) -> - ConnectOpts#{ - subscriptions => pre_process_in_out(in, InConf), - forwards => pre_process_in_out(out, OutConf) +mk_client_options(Conf, BridgeOpts) -> + Server = iolist_to_binary(maps:get(server, BridgeOpts)), + HostPort = emqx_connector_mqtt_schema:parse_server(Server), + Mountpoint = maps:get(receive_mountpoint, BridgeOpts, undefined), + Subscriptions = maps:get(subscriptions, Conf), + Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Subscriptions), + Opts = maps:without( + [ + address, + auto_reconnect, + conn_type, + mountpoint, + forwards, + receive_mountpoint, + subscriptions + ], + BridgeOpts + ), + Opts#{ + msg_handler => mk_client_event_handler(Vars, #{server => Server}), + hosts => [HostPort], + force_ping => true, + proto_ver => maps:get(proto_ver, BridgeOpts, v4) }. -pre_process_in_out(_, undefined) -> +mk_client_event_handler(Vars, Opts) when Vars /= undefined -> + #{ + publish => {fun ?MODULE:handle_publish/3, [Vars, Opts]}, + disconnected => {fun ?MODULE:handle_disconnect/1, []} + }; +mk_client_event_handler(undefined, _Opts) -> + undefined. + +connect(Name) -> + #{subscriptions := Subscriptions} = get_config(Name), + case emqtt:connect(pid(Name)) of + {ok, Properties} -> + case subscribe_remote_topics(Name, Subscriptions) of + ok -> + {ok, Properties}; + {ok, _, _RCs} -> + {ok, Properties}; + {error, Reason} = Error -> + ?SLOG(error, #{ + msg => "client_subscribe_failed", + subscriptions => Subscriptions, + reason => Reason + }), + Error + end; + {error, Reason} = Error -> + ?SLOG(error, #{ + msg => "client_connect_failed", + reason => Reason + }), + Error + end. + +subscribe_remote_topics(Ref, #{remote := #{topic := FromTopic, qos := QoS}}) -> + emqtt:subscribe(ref(Ref), FromTopic, QoS); +subscribe_remote_topics(_Ref, undefined) -> + ok. + +stop(Ref) -> + emqtt:stop(ref(Ref)). + +pid(Name) -> + gproc:lookup_pid(?NAME(Name)). + +status(Ref) -> + trycall( + fun() -> + Info = emqtt:info(ref(Ref)), + case proplists:get_value(socket, Info) of + Socket when Socket /= undefined -> + connected; + undefined -> + connecting + end + end, + #{noproc => disconnected} + ). + +ping(Ref) -> + emqtt:ping(ref(Ref)). + +send_to_remote(Name, MsgIn) -> + trycall( + fun() -> do_send(Name, export_msg(Name, MsgIn)) end, + #{ + badarg => {error, disconnected}, + noproc => {error, disconnected} + } + ). + +do_send(Name, {true, Msg}) -> + case emqtt:publish(pid(Name), Msg) of + ok -> + ok; + {ok, #{reason_code := RC}} when + RC =:= ?RC_SUCCESS; + RC =:= ?RC_NO_MATCHING_SUBSCRIBERS + -> + ok; + {ok, #{reason_code := RC, reason_code_name := Reason}} -> + ?SLOG(warning, #{ + msg => "remote_publish_failed", + message => Msg, + reason_code => RC, + reason_code_name => Reason + }), + {error, Reason}; + {error, Reason} -> + ?SLOG(info, #{ + msg => "client_failed", + reason => Reason + }), + {error, Reason} + end; +do_send(_Name, false) -> + ok. + +send_to_remote_async(Name, MsgIn, Callback) -> + trycall( + fun() -> do_send_async(Name, export_msg(Name, MsgIn), Callback) end, + #{badarg => {error, disconnected}} + ). + +do_send_async(Name, {true, Msg}, Callback) -> + emqtt:publish_async(pid(Name), Msg, _Timeout = infinity, Callback); +do_send_async(_Name, false, _Callback) -> + ok. + +ref(Pid) when is_pid(Pid) -> + Pid; +ref(Term) -> + ?REF(Term). + +trycall(Fun, Else) -> + try + Fun() + catch + error:badarg -> + maps:get(badarg, Else); + exit:{noproc, _} -> + maps:get(noproc, Else) + end. + +format_mountpoint(undefined) -> undefined; -pre_process_in_out(in, #{local := LC} = Conf) when is_map(Conf) -> +format_mountpoint(Prefix) -> + binary:replace(iolist_to_binary(Prefix), <<"${node}">>, atom_to_binary(node(), utf8)). + +pre_process_subscriptions(undefined) -> + undefined; +pre_process_subscriptions(#{local := LC} = Conf) when is_map(Conf) -> Conf#{local => pre_process_in_out_common(LC)}; -pre_process_in_out(in, Conf) when is_map(Conf) -> +pre_process_subscriptions(Conf) when is_map(Conf) -> %% have no 'local' field in the config + undefined. + +pre_process_forwards(undefined) -> undefined; -pre_process_in_out(out, #{remote := RC} = Conf) when is_map(Conf) -> +pre_process_forwards(#{remote := RC} = Conf) when is_map(Conf) -> Conf#{remote => pre_process_in_out_common(RC)}; -pre_process_in_out(out, Conf) when is_map(Conf) -> +pre_process_forwards(Conf) when is_map(Conf) -> %% have no 'remote' field in the config undefined. @@ -245,241 +325,112 @@ pre_process_conf(Key, Conf) -> Conf#{Key => Val} end. -code_change(_Vsn, State, Data, _Extra) -> - {ok, State, Data}. +get_config(Name) -> + gproc:lookup_value(?NAME(Name)). -terminate(_Reason, _StateName, State) -> - _ = disconnect(State), - maybe_destroy_session(State). +export_msg(Name, Msg) -> + case get_config(Name) of + #{forwards := Forwards = #{}, mountpoint := Mountpoint} -> + {true, export_msg(Mountpoint, Forwards, Msg)}; + #{forwards := undefined} -> + ?SLOG(error, #{ + msg => "forwarding_unavailable", + message => Msg, + reason => "egress is not configured" + }), + false + end. -maybe_destroy_session(#{connect_opts := ConnectOpts = #{clean_start := false}} = State) -> - try - %% Destroy session if clean_start is not set. - %% Ignore any crashes, just refresh the clean_start = true. - _ = do_connect(State#{connect_opts => ConnectOpts#{clean_start => true}}), - _ = disconnect(State), - ok - catch - _:_ -> +export_msg(Mountpoint, Forwards, Msg) -> + Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Forwards), + emqx_connector_mqtt_msg:to_remote_msg(Msg, Vars). + +%% + +handle_publish(#{properties := Props} = MsgIn, Vars, Opts) -> + Msg = import_msg(MsgIn, Opts), + ?SLOG(debug, #{ + msg => "publish_local", + message => Msg, + vars => Vars + }), + case Vars of + #{on_message_received := {Mod, Func, Args}} -> + _ = erlang:apply(Mod, Func, [Msg | Args]); + _ -> ok - end; -maybe_destroy_session(_State) -> + end, + maybe_publish_local(Msg, Vars, Props). + +handle_disconnect(_Reason) -> ok. -%% ensure_started will be deprecated in the future -idle({call, From}, ensure_started, State) -> - case do_connect(State) of - {ok, State1} -> - {next_state, connected, State1, {reply, From, ok}}; - {error, Reason, _State} -> - {keep_state_and_data, {reply, From, {error, Reason}}} - end; -idle({call, From}, {send_to_remote, _}, _State) -> - {keep_state_and_data, {reply, From, {error, {recoverable_error, not_connected}}}}; -idle({call, From}, {send_to_remote_async, _, _}, _State) -> - {keep_state_and_data, {reply, From, {error, {recoverable_error, not_connected}}}}; -%% @doc Standing by for manual start. -idle(info, idle, #{start_type := manual}) -> - keep_state_and_data; -%% @doc Standing by for auto start. -idle(info, idle, #{start_type := auto} = State) -> - connecting(State); -idle(state_timeout, reconnect, State) -> - connecting(State); -idle(Type, Content, State) -> - common(idle, Type, Content, State). - -connecting(#{reconnect_interval := ReconnectDelayMs} = State) -> - case do_connect(State) of - {ok, State1} -> - {next_state, connected, State1}; +maybe_publish_local(Msg, Vars, Props) -> + case emqx_map_lib:deep_get([local, topic], Vars, undefined) of + %% local topic is not set, discard it + undefined -> + ok; _ -> - {keep_state_and_data, {state_timeout, ReconnectDelayMs, reconnect}} + emqx_broker:publish(emqx_connector_mqtt_msg:to_broker_msg(Msg, Vars, Props)) end. -connected({call, From}, {send_to_remote, Msg}, State) -> - case do_send(State, Msg) of - {ok, NState} -> - {keep_state, NState, {reply, From, ok}}; - {error, Reason} -> - {keep_state_and_data, {reply, From, {error, Reason}}} - end; -connected( - {call, From}, - {send_to_remote_async, Msg, Callback}, - State = #{connection := Connection} -) -> - _ = do_send_async(State, Msg, Callback), - {keep_state, State, {reply, From, {ok, emqx_connector_mqtt_mod:info(pid, Connection)}}}; -connected( - info, - {disconnected, Conn, Reason}, - #{connection := Connection, name := Name, reconnect_interval := ReconnectDelayMs} = State -) -> - ?tp(info, disconnected, #{name => Name, reason => Reason}), - case Conn =:= maps:get(client_pid, Connection, undefined) of - true -> - {next_state, idle, State#{connection => undefined}, - {state_timeout, ReconnectDelayMs, reconnect}}; - false -> - keep_state_and_data - end; -connected(Type, Content, State) -> - common(connected, Type, Content, State). - -%% Common handlers -common(StateName, {call, From}, status, _State) -> - {keep_state_and_data, {reply, From, StateName}}; -common(_StateName, {call, From}, ping, #{connection := Conn} = _State) -> - Reply = emqx_connector_mqtt_mod:ping(Conn), - {keep_state_and_data, {reply, From, Reply}}; -common(_StateName, {call, From}, ensure_stopped, #{connection := undefined} = _State) -> - {keep_state_and_data, {reply, From, ok}}; -common(_StateName, {call, From}, ensure_stopped, #{connection := Conn} = State) -> - Reply = emqx_connector_mqtt_mod:stop(Conn), - {next_state, idle, State#{connection => undefined}, {reply, From, Reply}}; -common(_StateName, {call, From}, get_forwards, #{connect_opts := #{forwards := Forwards}}) -> - {keep_state_and_data, {reply, From, Forwards}}; -common(_StateName, {call, From}, get_subscriptions, #{connection := Connection}) -> - {keep_state_and_data, {reply, From, maps:get(subscriptions, Connection, #{})}}; -common(_StateName, {call, From}, Req, _State) -> - {keep_state_and_data, {reply, From, {error, {unsupported_request, Req}}}}; -common(_StateName, info, {'EXIT', _, _}, State) -> - {keep_state, State}; -common(StateName, Type, Content, #{name := Name} = State) -> - ?SLOG(error, #{ - msg => "bridge_discarded_event", - name => Name, - type => Type, - state_name => StateName, - content => Content - }), - {keep_state, State}. - -do_connect( +import_msg( #{ - connect_opts := ConnectOpts, - name := Name - } = State -) -> - case emqx_connector_mqtt_mod:start(ConnectOpts) of - {ok, Conn} -> - ?tp(info, connected, #{name => Name}), - {ok, State#{connection => Conn}}; - {error, Reason} -> - ConnectOpts1 = obfuscate(ConnectOpts), - ?SLOG(error, #{ - msg => "failed_to_connect", - config => ConnectOpts1, - reason => Reason - }), - {error, Reason, State} - end. - -do_send(#{connect_opts := #{forwards := undefined}}, Msg) -> - ?SLOG(error, #{ - msg => - "cannot_forward_messages_to_remote_broker" - "_as_'egress'_is_not_configured", - messages => Msg - }); -do_send( - #{ - connection := Connection, - mountpoint := Mountpoint, - connect_opts := #{forwards := Forwards} - } = State, - Msg -) -> - Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Forwards), - ExportMsg = emqx_connector_mqtt_msg:to_remote_msg(Msg, Vars), - ?SLOG(debug, #{ - msg => "publish_to_remote_broker", - message => Msg, - vars => Vars - }), - case emqx_connector_mqtt_mod:send(Connection, ExportMsg) of - ok -> - {ok, State}; - {ok, #{reason_code := RC}} when - RC =:= ?RC_SUCCESS; - RC =:= ?RC_NO_MATCHING_SUBSCRIBERS - -> - {ok, State}; - {ok, #{reason_code := RC, reason_code_name := RCN}} -> - ?SLOG(warning, #{ - msg => "publish_to_remote_node_falied", - message => Msg, - reason_code => RC, - reason_code_name => RCN - }), - {error, RCN}; - {error, Reason} -> - ?SLOG(info, #{ - msg => "mqtt_bridge_produce_failed", - reason => Reason - }), - {error, Reason} - end. - -do_send_async(#{connect_opts := #{forwards := undefined}}, Msg, _Callback) -> - %% TODO: eval callback with undefined error - ?SLOG(error, #{ - msg => - "cannot_forward_messages_to_remote_broker" - "_as_'egress'_is_not_configured", - messages => Msg - }); -do_send_async( - #{ - connection := Connection, - mountpoint := Mountpoint, - connect_opts := #{forwards := Forwards} + dup := Dup, + payload := Payload, + properties := Props, + qos := QoS, + retain := Retain, + topic := Topic }, - Msg, - Callback + #{server := Server} ) -> - Vars = emqx_connector_mqtt_msg:make_pub_vars(Mountpoint, Forwards), - ExportMsg = emqx_connector_mqtt_msg:to_remote_msg(Msg, Vars), - ?SLOG(debug, #{ - msg => "publish_to_remote_broker", - message => Msg, - vars => Vars - }), - emqx_connector_mqtt_mod:send_async(Connection, ExportMsg, Callback). + #{ + id => emqx_guid:to_hexstr(emqx_guid:gen()), + server => Server, + payload => Payload, + topic => Topic, + qos => QoS, + dup => Dup, + retain => Retain, + pub_props => printable_maps(Props), + message_received_at => erlang:system_time(millisecond) + }. -disconnect(#{connection := Conn} = State) when Conn =/= undefined -> - emqx_connector_mqtt_mod:stop(Conn), - State#{connection => undefined}; -disconnect(State) -> - State. - -format_mountpoint(undefined) -> - undefined; -format_mountpoint(Prefix) -> - binary:replace(iolist_to_binary(Prefix), <<"${node}">>, atom_to_binary(node(), utf8)). - -name(Id) -> list_to_atom(str(Id)). - -obfuscate(Map) -> +printable_maps(undefined) -> + #{}; +printable_maps(Headers) -> maps:fold( - fun(K, V, Acc) -> - case is_sensitive(K) of - true -> [{K, '***'} | Acc]; - false -> [{K, V} | Acc] - end + fun + ('User-Property', V0, AccIn) when is_list(V0) -> + AccIn#{ + 'User-Property' => maps:from_list(V0), + 'User-Property-Pairs' => [ + #{ + key => Key, + value => Value + } + || {Key, Value} <- V0 + ] + }; + (K, V0, AccIn) -> + AccIn#{K => V0} end, - [], - Map + #{}, + Headers ). -is_sensitive(password) -> true; -is_sensitive(ssl_opts) -> true; -is_sensitive(_) -> false. - -str(A) when is_atom(A) -> - atom_to_list(A); -str(B) when is_binary(B) -> - binary_to_list(B); -str(S) when is_list(S) -> - S. +%% TODO +% maybe_destroy_session(#{connect_opts := ConnectOpts = #{clean_start := false}} = State) -> +% try +% %% Destroy session if clean_start is not set. +% %% Ignore any crashes, just refresh the clean_start = true. +% _ = do_connect(State#{connect_opts => ConnectOpts#{clean_start => true}}), +% _ = disconnect(State), +% ok +% catch +% _:_ -> +% ok +% end; +% maybe_destroy_session(_State) -> +% ok. diff --git a/apps/emqx_connector/test/emqx_connector_mqtt_tests.erl b/apps/emqx_connector/test/emqx_connector_mqtt_tests.erl deleted file mode 100644 index 88c8b5218..000000000 --- a/apps/emqx_connector/test/emqx_connector_mqtt_tests.erl +++ /dev/null @@ -1,60 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(emqx_connector_mqtt_tests). - --include_lib("eunit/include/eunit.hrl"). --include_lib("emqx/include/emqx_mqtt.hrl"). - -send_and_ack_test() -> - %% delegate from gen_rpc to rpc for unit test - meck:new(emqtt, [passthrough, no_history]), - meck:expect( - emqtt, - start_link, - 1, - fun(_) -> - {ok, spawn_link(fun() -> ok end)} - end - ), - meck:expect(emqtt, connect, 1, {ok, dummy}), - meck:expect( - emqtt, - stop, - 1, - fun(Pid) -> Pid ! stop end - ), - meck:expect( - emqtt, - publish, - 2, - fun(Client, Msg) -> - Client ! {publish, Msg}, - %% as packet id - {ok, Msg} - end - ), - try - Max = 1, - Batch = lists:seq(1, Max), - {ok, Conn} = emqx_connector_mqtt_mod:start(#{server => "127.0.0.1:1883"}), - %% return last packet id as batch reference - {ok, _AckRef} = emqx_connector_mqtt_mod:send(Conn, Batch), - - ok = emqx_connector_mqtt_mod:stop(Conn) - after - meck:unload(emqtt) - end. diff --git a/apps/emqx_connector/test/emqx_connector_mqtt_worker_tests.erl b/apps/emqx_connector/test/emqx_connector_mqtt_worker_tests.erl deleted file mode 100644 index 49bff7bbc..000000000 --- a/apps/emqx_connector/test/emqx_connector_mqtt_worker_tests.erl +++ /dev/null @@ -1,101 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(emqx_connector_mqtt_worker_tests). - --include_lib("eunit/include/eunit.hrl"). --include_lib("emqx/include/emqx.hrl"). --include_lib("emqx/include/emqx_mqtt.hrl"). - --define(BRIDGE_NAME, test). --define(BRIDGE_REG_NAME, emqx_connector_mqtt_worker_test). --define(WAIT(PATTERN, TIMEOUT), - receive - PATTERN -> - ok - after TIMEOUT -> - error(timeout) - end -). - --export([start/1, send/2, stop/1]). - -start(#{connect_result := Result, test_pid := Pid, test_ref := Ref}) -> - case is_pid(Pid) of - true -> Pid ! {connection_start_attempt, Ref}; - false -> ok - end, - Result. - -send(SendFun, Batch) when is_function(SendFun, 2) -> - SendFun(Batch). - -stop(_Pid) -> ok. - -%% connect first, disconnect, then connect again -disturbance_test() -> - meck:new(emqx_connector_mqtt_mod, [passthrough, no_history]), - meck:expect(emqx_connector_mqtt_mod, start, 1, fun(Conf) -> start(Conf) end), - meck:expect(emqx_connector_mqtt_mod, send, 2, fun(SendFun, Batch) -> send(SendFun, Batch) end), - meck:expect(emqx_connector_mqtt_mod, stop, 1, fun(Pid) -> stop(Pid) end), - try - emqx_metrics:start_link(), - Ref = make_ref(), - TestPid = self(), - Config = make_config(Ref, TestPid, {ok, #{client_pid => TestPid}}), - {ok, Pid} = emqx_connector_mqtt_worker:start_link(Config#{name => bridge_disturbance}), - ?assertEqual(Pid, whereis(bridge_disturbance)), - ?WAIT({connection_start_attempt, Ref}, 1000), - Pid ! {disconnected, TestPid, test}, - ?WAIT({connection_start_attempt, Ref}, 1000), - emqx_metrics:stop(), - ok = emqx_connector_mqtt_worker:stop(Pid) - after - meck:unload(emqx_connector_mqtt_mod) - end. - -manual_start_stop_test() -> - meck:new(emqx_connector_mqtt_mod, [passthrough, no_history]), - meck:expect(emqx_connector_mqtt_mod, start, 1, fun(Conf) -> start(Conf) end), - meck:expect(emqx_connector_mqtt_mod, send, 2, fun(SendFun, Batch) -> send(SendFun, Batch) end), - meck:expect(emqx_connector_mqtt_mod, stop, 1, fun(Pid) -> stop(Pid) end), - try - emqx_metrics:start_link(), - Ref = make_ref(), - TestPid = self(), - BridgeName = manual_start_stop, - Config0 = make_config(Ref, TestPid, {ok, #{client_pid => TestPid}}), - Config = Config0#{start_type := manual}, - {ok, Pid} = emqx_connector_mqtt_worker:start_link(Config#{name => BridgeName}), - %% call ensure_started again should yield the same result - ok = emqx_connector_mqtt_worker:ensure_started(BridgeName), - emqx_connector_mqtt_worker:ensure_stopped(BridgeName), - emqx_metrics:stop(), - ok = emqx_connector_mqtt_worker:stop(Pid) - after - meck:unload(emqx_connector_mqtt_mod) - end. - -make_config(Ref, TestPid, Result) -> - #{ - start_type => auto, - subscriptions => undefined, - forwards => undefined, - reconnect_interval => 50, - test_pid => TestPid, - test_ref => Ref, - connect_result => Result - }. From c76311c9c306c8ede80b54691caedb52a9d5442c Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 31 Jan 2023 14:17:45 +0300 Subject: [PATCH 056/131] fix(buffer): count inflight batches properly --- .../src/emqx_resource_buffer_worker.erl | 9 +++--- .../test/emqx_resource_SUITE.erl | 32 +++++++++++-------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 50534df4f..c5395c8df 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -1121,6 +1121,10 @@ append_queue(Id, Index, Q, Queries) -> -define(INITIAL_TIME_REF, initial_time). -define(INITIAL_MONOTONIC_TIME_REF, initial_monotonic_time). +%% NOTE +%% There are 4 metadata rows in an inflight table, keyed by atoms declared above. ☝ +-define(INFLIGHT_META_ROWS, 4). + inflight_new(InfltWinSZ, Id, Index) -> TableId = ets:new( emqx_resource_buffer_worker_inflight_tab, @@ -1181,12 +1185,9 @@ is_inflight_full(InflightTID) -> Size >= MaxSize. inflight_num_batches(InflightTID) -> - %% Note: we subtract 2 because there're 2 metadata rows that hold - %% the maximum size value and the number of messages. - MetadataRowCount = 2, case ets:info(InflightTID, size) of undefined -> 0; - Size -> max(0, Size - MetadataRowCount) + Size -> max(0, Size - ?INFLIGHT_META_ROWS) end. inflight_num_msgs(InflightTID) -> diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 227b6fedc..27101d1cc 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -411,7 +411,8 @@ t_query_counter_async_inflight(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(WindowSize, ReqOpts), + %% one more so that inflight would be already full upon last query + inc_counter_in_parallel(WindowSize + 1, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 1_000 ), @@ -445,9 +446,9 @@ t_query_counter_async_inflight(_) -> %% all responses should be received after the resource is resumed. {ok, SRef0} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - %% +1 because the tmp_query above will be retried and succeed + %% +2 because the tmp_query above will be retried and succeed %% this time. - WindowSize + 1, + WindowSize + 2, _Timeout0 = 10_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), @@ -475,7 +476,7 @@ t_query_counter_async_inflight(_) -> fun(Trace) -> QueryTrace = ?of_kind(call_query_async, Trace), ?assertMatch([#{query := {query, _, {inc_counter, _}, _, _}} | _], QueryTrace), - ?assertEqual(WindowSize + Num, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), + ?assertEqual(WindowSize + Num + 1, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), ok end @@ -487,7 +488,8 @@ t_query_counter_async_inflight(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(WindowSize, ReqOpts), + %% one more so that inflight would be already full upon last query + inc_counter_in_parallel(WindowSize + 1, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 1_000 ), @@ -500,10 +502,10 @@ t_query_counter_async_inflight(_) -> %% this will block the resource_worker ok = emqx_resource:query(?ID, {inc_counter, 4}), - Sent = WindowSize + Num + WindowSize, + Sent = WindowSize + 1 + Num + WindowSize + 1, {ok, SRef1} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - WindowSize, + WindowSize + 1, _Timeout0 = 10_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), @@ -593,7 +595,8 @@ t_query_counter_async_inflight_batch(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(NumMsgs, ReqOpts), + %% a batch more so that inflight would be already full upon last query + inc_counter_in_parallel(NumMsgs + BatchSize, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 5_000 ), @@ -652,9 +655,9 @@ t_query_counter_async_inflight_batch(_) -> %% all responses should be received after the resource is resumed. {ok, SRef0} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - %% +1 because the tmp_query above will be retried and succeed + %% +2 because the tmp_query above will be retried and succeed %% this time. - WindowSize + 1, + WindowSize + 2, 10_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), @@ -664,7 +667,7 @@ t_query_counter_async_inflight_batch(_) -> %% take it again from the table; this time, it should have %% succeeded. ?assertMatch([{tmp_query, ok}], ets:take(Tab0, tmp_query)), - ?assertEqual(NumMsgs, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), + ?assertEqual(NumMsgs + BatchSize, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), %% send async query, this time everything should be ok. @@ -691,7 +694,7 @@ t_query_counter_async_inflight_batch(_) -> end ), ?assertEqual( - NumMsgs + NumMsgs1, + NumMsgs + BatchSize + NumMsgs1, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)} ), @@ -703,7 +706,8 @@ t_query_counter_async_inflight_batch(_) -> ?check_trace( {_, {ok, _}} = ?wait_async_action( - inc_counter_in_parallel(NumMsgs, ReqOpts), + %% a batch more so that inflight would be already full upon last query + inc_counter_in_parallel(NumMsgs + BatchSize, ReqOpts), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 5_000 ), @@ -719,7 +723,7 @@ t_query_counter_async_inflight_batch(_) -> %% this will block the resource_worker ok = emqx_resource:query(?ID, {inc_counter, 1}), - Sent = NumMsgs + NumMsgs1 + NumMsgs, + Sent = NumMsgs + BatchSize + NumMsgs1 + NumMsgs, {ok, SRef1} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), WindowSize, From c5a7cd5acd38440d1be63dd5690ea688bbe5232f Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 31 Jan 2023 15:20:46 +0300 Subject: [PATCH 057/131] fix(mqtt-bridge): drop unused configuration parameter --- .../src/schema/emqx_bridge_compatible_config.erl | 1 - .../test/emqx_bridge_compatible_config_tests.erl | 2 -- apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl | 6 ------ 3 files changed, 9 deletions(-) diff --git a/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl b/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl index 862b5e188..1e55d0c0e 100644 --- a/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl +++ b/apps/emqx_bridge/src/schema/emqx_bridge_compatible_config.erl @@ -72,7 +72,6 @@ up(#{<<"connector">> := Connector} = Config) -> Cn(proto_ver, <<"v4">>), Cn(server, undefined), Cn(retry_interval, <<"15s">>), - Cn(reconnect_interval, <<"15s">>), Cn(ssl, default_ssl()), {enable, Enable}, {resource_opts, default_resource_opts()}, diff --git a/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl b/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl index a2671a40e..36dd6324a 100644 --- a/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl +++ b/apps/emqx_bridge/test/emqx_bridge_compatible_config_tests.erl @@ -224,7 +224,6 @@ bridges { mode = \"cluster_shareload\" password = \"\" proto_ver = \"v5\" - reconnect_interval = \"15s\" replayq {offload = false, seg_bytes = \"100MB\"} retry_interval = \"12s\" server = \"localhost:1883\" @@ -257,7 +256,6 @@ bridges { mode = \"cluster_shareload\" password = \"\" proto_ver = \"v4\" - reconnect_interval = \"15s\" replayq {offload = false, seg_bytes = \"100MB\"} retry_interval = \"44s\" server = \"localhost:1883\" diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl index be462fcc1..6ea609cc6 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl @@ -72,12 +72,6 @@ fields("server_configs") -> )}, {server, emqx_schema:servers_sc(#{desc => ?DESC("server")}, ?MQTT_HOST_OPTS)}, {clientid_prefix, mk(binary(), #{required => false, desc => ?DESC("clientid_prefix")})}, - {reconnect_interval, - mk_duration( - "Reconnect interval. Delay for the MQTT bridge to retry establishing the connection " - "in case of transportation failure.", - #{default => "15s"} - )}, {proto_ver, mk( hoconsc:enum([v3, v4, v5]), From b0ac924ca91b9fe5801d73406d5a15f33276b316 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 14:40:06 +0100 Subject: [PATCH 058/131] refactor: less copy-paste --- apps/emqx/src/emqx_schema.erl | 12 ++++-------- apps/emqx_conf/src/emqx_conf_schema.erl | 15 ++------------- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 8f016f684..48bd206c9 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -1815,16 +1815,12 @@ desc(_) -> %% utils -spec conf_get(string() | [string()], hocon:config()) -> term(). conf_get(Key, Conf) -> - V = hocon_maps:get(Key, Conf), - case is_binary(V) of - true -> - binary_to_list(V); - false -> - V - end. + ensure_list(hocon_maps:get(Key, Conf)). conf_get(Key, Conf, Default) -> - V = hocon_maps:get(Key, Conf, Default), + ensure_list(hocon_maps:get(Key, Conf, Default)). + +ensure_list(V) -> case is_binary(V) of true -> binary_to_list(V); diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index bc9154933..12f5a00ec 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1308,11 +1308,9 @@ crash_dump_file_default() -> %% utils -spec conf_get(string() | [string()], hocon:config()) -> term(). -conf_get(Key, Conf) -> - ensure_list(hocon_maps:get(Key, Conf)). +conf_get(Key, Conf) -> emqx_schema:conf_get(Key, Conf). -conf_get(Key, Conf, Default) -> - ensure_list(hocon_maps:get(Key, Conf, Default)). +conf_get(Key, Conf, Default) -> emqx_schema:conf_get(Key, Conf, Default). filter(Opts) -> [{K, V} || {K, V} <- Opts, V =/= undefined]. @@ -1376,15 +1374,6 @@ to_atom(Str) when is_list(Str) -> to_atom(Bin) when is_binary(Bin) -> binary_to_atom(Bin, utf8). --spec ensure_list(binary() | list(char())) -> list(char()). -ensure_list(V) -> - case is_binary(V) of - true -> - binary_to_list(V); - false -> - V - end. - roots(Module) -> lists:map(fun({_BinName, Root}) -> Root end, hocon_schema:roots(Module)). From 605d9972e467b5b341ca283731bbee781196fc2a Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 18:13:13 +0100 Subject: [PATCH 059/131] fix(config): avoid calling emqx:update_config/2 for logger refresh Prior to this fix, whatever configured for loggers are persisted to cluster-override.conf. This may cause troulbe for users who changes boot mode. For example if the node is once started in console mode, the logging config is persisted, with console enabled, but file disabled. Then if the user decide to start in daemon mode, all the logs will silently go to erlang.log.N and emqx.log.N will be empty. After this fix, only changes really made into cluter-override.conf will take effect. --- apps/emqx/src/config/emqx_config_logger.erl | 241 +++++++++++++++++--- apps/emqx_conf/src/emqx_conf_schema.erl | 140 +----------- 2 files changed, 218 insertions(+), 163 deletions(-) diff --git a/apps/emqx/src/config/emqx_config_logger.erl b/apps/emqx/src/config/emqx_config_logger.erl index 4b46e43d6..4d787ea0c 100644 --- a/apps/emqx/src/config/emqx_config_logger.erl +++ b/apps/emqx/src/config/emqx_config_logger.erl @@ -18,6 +18,7 @@ -behaviour(emqx_config_handler). %% API +-export([tr_handlers/1, tr_level/1]). -export([add_handler/0, remove_handler/0, refresh_config/0]). -export([post_config_update/5]). @@ -37,38 +38,224 @@ remove_handler() -> %% so we need to refresh the logger config after this node starts. %% It will not affect the logger config when cluster-override.conf is unchanged. refresh_config() -> - case emqx:get_raw_config(?LOG, undefined) of - %% no logger config when CT is running. - undefined -> - ok; - Log -> - {ok, _} = emqx:update_config(?LOG, Log), - ok - end. + Overrides = emqx_config:read_override_confs(), + refresh_config(Overrides). -post_config_update(?LOG, _Req, _NewConf, _OldConf, AppEnvs) -> - Kernel = proplists:get_value(kernel, AppEnvs), - NewHandlers = proplists:get_value(logger, Kernel, []), - Level = proplists:get_value(logger_level, Kernel, warning), - ok = update_log_handlers(NewHandlers), - ok = emqx_logger:set_primary_log_level(Level), - application:set_env(kernel, logger_level, Level), - ok; +refresh_config(#{<<"log">> := _}) -> + %% read the checked config + LogConfig = emqx:get_config(?LOG, undefined), + Conf = #{log => LogConfig}, + ok = do_refresh_config(Conf); +refresh_config(_) -> + %% No config override found for 'log', do nothing + %% because the 'kernel' app should already be configured + %% from the base configs. i.e. emqx.conf + env vars + ok. + +%% this call is shared between initial config refresh at boot +%% and dynamic config update from HTTP API +do_refresh_config(Conf) -> + Handlers = tr_handlers(Conf), + ok = update_log_handlers(Handlers), + Level = tr_level(Conf), + ok = maybe_update_log_level(Level), + ok. + +post_config_update(?LOG, _Req, NewConf, _OldConf, _AppEnvs) -> + ok = do_refresh_config(#{log => NewConf}); post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) -> ok. +maybe_update_log_level(NewLevel) -> + OldLevel = application:get_env(kernel, logger_level, warning), + case OldLevel =:= NewLevel of + true -> + %% no change + ok; + false -> + log_to_console("Config override: log level is set to '~p'~n", [NewLevel]) + end. + +log_to_console(Fmt, Args) -> + io:format(standard_error, Fmt, Args). + update_log_handlers(NewHandlers) -> OldHandlers = application:get_env(kernel, logger, []), - lists:foreach( - fun({handler, HandlerId, _Mod, _Conf}) -> - logger:remove_handler(HandlerId) + NewHandlersIds = lists:map(fun({handler, Id, _Mod, _Conf}) -> Id end, NewHandlers), + OldHandlersIds = lists:map(fun({handler, Id, _Mod, _Conf}) -> Id end, OldHandlers), + Removes = lists:map(fun(Id) -> {removed, Id} end, OldHandlersIds -- NewHandlersIds), + MapFn = fun({handler, Id, Mod, Conf} = Handler) -> + case lists:keyfind(Id, 2, OldHandlers) of + {handler, Id, Mod, Conf} -> + %% no change + false; + {handler, Id, _Mod, _Conf} -> + {true, {updated, Handler}}; + false -> + {true, {enabled, Handler}} + end + end, + AddsAndUpdates = lists:filtermap(MapFn, NewHandlers), + lists:foreach(fun update_log_handler/1, Removes ++ AddsAndUpdates), + _ = application:set_env(kernel, logger, NewHandlers), + ok. + +update_log_handler({removed, Id}) -> + log_to_console("Config override: ~s is removed~n", [id_for_log(Id)]), + logger:remove_handler(Id); +update_log_handler({Action, {handler, Id, Mod, Conf}}) -> + log_to_console("Config override: ~s is ~p~n", [id_for_log(Id), Action]), + % may return {error, {not_found, Id}} + _ = logger:remove_handler(Id), + ok = logger:add_handler(Id, Mod, Conf). + +id_for_log(console) -> "log.console_handler"; +id_for_log(Other) -> "log.file_handlers." ++ atom_to_list(Other). + +atom(Id) when is_binary(Id) -> binary_to_atom(Id, utf8); +atom(Id) when is_atom(Id) -> Id. + +%% @doc Translate raw config to app-env conpatible log handler configs list. +tr_handlers(Conf) -> + %% mute the default handler + tr_console_handler(Conf) ++ + tr_file_handlers(Conf). + +%% For the default logger that outputs to console +tr_console_handler(Conf) -> + case conf_get("log.console_handler.enable", Conf) of + true -> + ConsoleConf = conf_get("log.console_handler", Conf), + [ + {handler, console, logger_std_h, #{ + level => conf_get("log.console_handler.level", Conf), + config => (log_handler_conf(ConsoleConf))#{type => standard_io}, + formatter => log_formatter(ConsoleConf), + filters => log_filter(ConsoleConf) + }} + ]; + false -> + [] + end. + +%% For the file logger +tr_file_handlers(Conf) -> + Handlers = logger_file_handlers(Conf), + lists:map(fun tr_file_handler/1, Handlers). + +tr_file_handler({HandlerName, SubConf}) -> + {handler, atom(HandlerName), logger_disk_log_h, #{ + level => conf_get("level", SubConf), + config => (log_handler_conf(SubConf))#{ + type => + case conf_get("rotation.enable", SubConf) of + true -> wrap; + _ -> halt + end, + file => conf_get("file", SubConf), + max_no_files => conf_get("rotation.count", SubConf), + max_no_bytes => conf_get("max_size", SubConf) + }, + formatter => log_formatter(SubConf), + filters => log_filter(SubConf), + filesync_repeat_interval => no_repeat + }}. + +logger_file_handlers(Conf) -> + Handlers = maps:to_list(conf_get("log.file_handlers", Conf, #{})), + lists:filter( + fun({_Name, Opts}) -> + B = conf_get("enable", Opts), + true = is_boolean(B), + B end, - OldHandlers -- NewHandlers - ), - lists:foreach( - fun({handler, HandlerId, Mod, Conf}) -> - logger:add_handler(HandlerId, Mod, Conf) + Handlers + ). + +conf_get(Key, Conf) -> emqx_schema:conf_get(Key, Conf). +conf_get(Key, Conf, Default) -> emqx_schema:conf_get(Key, Conf, Default). + +log_handler_conf(Conf) -> + SycModeQlen = conf_get("sync_mode_qlen", Conf), + DropModeQlen = conf_get("drop_mode_qlen", Conf), + FlushQlen = conf_get("flush_qlen", Conf), + Overkill = conf_get("overload_kill", Conf), + BurstLimit = conf_get("burst_limit", Conf), + #{ + sync_mode_qlen => SycModeQlen, + drop_mode_qlen => DropModeQlen, + flush_qlen => FlushQlen, + overload_kill_enable => conf_get("enable", Overkill), + overload_kill_qlen => conf_get("qlen", Overkill), + overload_kill_mem_size => conf_get("mem_size", Overkill), + overload_kill_restart_after => conf_get("restart_after", Overkill), + burst_limit_enable => conf_get("enable", BurstLimit), + burst_limit_max_count => conf_get("max_count", BurstLimit), + burst_limit_window_time => conf_get("window_time", BurstLimit) + }. + +log_formatter(Conf) -> + CharsLimit = + case conf_get("chars_limit", Conf) of + unlimited -> unlimited; + V when V > 0 -> V end, - NewHandlers -- OldHandlers - ), - application:set_env(kernel, logger, NewHandlers). + TimeOffSet = + case conf_get("time_offset", Conf) of + "system" -> ""; + "utc" -> 0; + OffSetStr -> OffSetStr + end, + SingleLine = conf_get("single_line", Conf), + Depth = conf_get("max_depth", Conf), + do_formatter(conf_get("formatter", Conf), CharsLimit, SingleLine, TimeOffSet, Depth). + +%% helpers +do_formatter(json, CharsLimit, SingleLine, TimeOffSet, Depth) -> + {emqx_logger_jsonfmt, #{ + chars_limit => CharsLimit, + single_line => SingleLine, + time_offset => TimeOffSet, + depth => Depth + }}; +do_formatter(text, CharsLimit, SingleLine, TimeOffSet, Depth) -> + {emqx_logger_textfmt, #{ + template => [time, " [", level, "] ", msg, "\n"], + chars_limit => CharsLimit, + single_line => SingleLine, + time_offset => TimeOffSet, + depth => Depth + }}. + +log_filter(Conf) -> + case conf_get("supervisor_reports", Conf) of + error -> [{drop_progress_reports, {fun logger_filters:progress/2, stop}}]; + progress -> [] + end. + +tr_level(Conf) -> + ConsoleLevel = conf_get("log.console_handler.level", Conf, undefined), + FileLevels = [ + conf_get("level", SubConf) + || {_, SubConf} <- + logger_file_handlers(Conf) + ], + case FileLevels ++ [ConsoleLevel || ConsoleLevel =/= undefined] of + %% warning is the default level we should use + [] -> warning; + Levels -> least_severe_log_level(Levels) + end. + +least_severe_log_level(Levels) -> + hd(sort_log_levels(Levels)). + +sort_log_levels(Levels) -> + lists:sort( + fun(A, B) -> + case logger:compare_levels(A, B) of + R when R == lt; R == eq -> true; + gt -> false + end + end, + Levels + ). diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index 12f5a00ec..9793e00d0 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -993,7 +993,7 @@ translation("ekka") -> translation("kernel") -> [ {"logger_level", fun tr_logger_level/1}, - {"logger", fun tr_logger/1}, + {"logger", fun tr_logger_handlers/1}, {"error_logger", fun(_) -> silent end} ]; translation("emqx") -> @@ -1065,70 +1065,10 @@ tr_cluster_discovery(Conf) -> -spec tr_logger_level(hocon:config()) -> logger:level(). tr_logger_level(Conf) -> - ConsoleLevel = conf_get("log.console_handler.level", Conf, undefined), - FileLevels = [ - conf_get("level", SubConf) - || {_, SubConf} <- - logger_file_handlers(Conf) - ], - case FileLevels ++ [ConsoleLevel || ConsoleLevel =/= undefined] of - %% warning is the default level we should use - [] -> warning; - Levels -> least_severe_log_level(Levels) - end. + emqx_config_logger:tr_level(Conf). -logger_file_handlers(Conf) -> - Handlers = maps:to_list(conf_get("log.file_handlers", Conf, #{})), - lists:filter( - fun({_Name, Opts}) -> - B = conf_get("enable", Opts), - true = is_boolean(B), - B - end, - Handlers - ). - -tr_logger(Conf) -> - %% For the default logger that outputs to console - ConsoleHandler = - case conf_get("log.console_handler.enable", Conf) of - true -> - ConsoleConf = conf_get("log.console_handler", Conf), - [ - {handler, console, logger_std_h, #{ - level => conf_get("log.console_handler.level", Conf), - config => (log_handler_conf(ConsoleConf))#{type => standard_io}, - formatter => log_formatter(ConsoleConf), - filters => log_filter(ConsoleConf) - }} - ]; - false -> - [] - end, - %% For the file logger - FileHandlers = - [ - begin - {handler, to_atom(HandlerName), logger_disk_log_h, #{ - level => conf_get("level", SubConf), - config => (log_handler_conf(SubConf))#{ - type => - case conf_get("rotation.enable", SubConf) of - true -> wrap; - _ -> halt - end, - file => conf_get("file", SubConf), - max_no_files => conf_get("rotation.count", SubConf), - max_no_bytes => conf_get("max_size", SubConf) - }, - formatter => log_formatter(SubConf), - filters => log_filter(SubConf), - filesync_repeat_interval => no_repeat - }} - end - || {HandlerName, SubConf} <- logger_file_handlers(Conf) - ], - [{handler, default, undefined}] ++ ConsoleHandler ++ FileHandlers. +tr_logger_handlers(Conf) -> + emqx_config_logger:tr_handlers(Conf). log_handler_common_confs(Enable) -> [ @@ -1225,78 +1165,6 @@ log_handler_common_confs(Enable) -> )} ]. -log_handler_conf(Conf) -> - SycModeQlen = conf_get("sync_mode_qlen", Conf), - DropModeQlen = conf_get("drop_mode_qlen", Conf), - FlushQlen = conf_get("flush_qlen", Conf), - Overkill = conf_get("overload_kill", Conf), - BurstLimit = conf_get("burst_limit", Conf), - #{ - sync_mode_qlen => SycModeQlen, - drop_mode_qlen => DropModeQlen, - flush_qlen => FlushQlen, - overload_kill_enable => conf_get("enable", Overkill), - overload_kill_qlen => conf_get("qlen", Overkill), - overload_kill_mem_size => conf_get("mem_size", Overkill), - overload_kill_restart_after => conf_get("restart_after", Overkill), - burst_limit_enable => conf_get("enable", BurstLimit), - burst_limit_max_count => conf_get("max_count", BurstLimit), - burst_limit_window_time => conf_get("window_time", BurstLimit) - }. - -log_formatter(Conf) -> - CharsLimit = - case conf_get("chars_limit", Conf) of - unlimited -> unlimited; - V when V > 0 -> V - end, - TimeOffSet = - case conf_get("time_offset", Conf) of - "system" -> ""; - "utc" -> 0; - OffSetStr -> OffSetStr - end, - SingleLine = conf_get("single_line", Conf), - Depth = conf_get("max_depth", Conf), - do_formatter(conf_get("formatter", Conf), CharsLimit, SingleLine, TimeOffSet, Depth). - -%% helpers -do_formatter(json, CharsLimit, SingleLine, TimeOffSet, Depth) -> - {emqx_logger_jsonfmt, #{ - chars_limit => CharsLimit, - single_line => SingleLine, - time_offset => TimeOffSet, - depth => Depth - }}; -do_formatter(text, CharsLimit, SingleLine, TimeOffSet, Depth) -> - {emqx_logger_textfmt, #{ - template => [time, " [", level, "] ", msg, "\n"], - chars_limit => CharsLimit, - single_line => SingleLine, - time_offset => TimeOffSet, - depth => Depth - }}. - -log_filter(Conf) -> - case conf_get("supervisor_reports", Conf) of - error -> [{drop_progress_reports, {fun logger_filters:progress/2, stop}}]; - progress -> [] - end. - -least_severe_log_level(Levels) -> - hd(sort_log_levels(Levels)). - -sort_log_levels(Levels) -> - lists:sort( - fun(A, B) -> - case logger:compare_levels(A, B) of - R when R == lt; R == eq -> true; - gt -> false - end - end, - Levels - ). - crash_dump_file_default() -> case os:getenv("RUNNER_LOG_DIR") of false -> From ddfbc0a19f489219902f26af228c5f2de0799db9 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 18:33:10 +0100 Subject: [PATCH 060/131] refactor: refresh logger config before starting listeners --- apps/emqx/src/emqx_app.erl | 1 + apps/emqx/src/emqx_config.erl | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_app.erl b/apps/emqx/src/emqx_app.erl index 6188d8030..c56cdb520 100644 --- a/apps/emqx/src/emqx_app.erl +++ b/apps/emqx/src/emqx_app.erl @@ -42,6 +42,7 @@ start(_Type, _Args) -> ok = maybe_load_config(), ok = emqx_persistent_session:init_db_backend(), + ok = emqx_config_logger:refresh_config(), ok = maybe_start_quicer(), ok = emqx_bpapi:start(), wait_boot_shards(), diff --git a/apps/emqx/src/emqx_config.erl b/apps/emqx/src/emqx_config.erl index 117043911..6d706316c 100644 --- a/apps/emqx/src/emqx_config.erl +++ b/apps/emqx/src/emqx_config.erl @@ -592,7 +592,6 @@ save_to_override_conf(RawConf, Opts) -> add_handlers() -> ok = emqx_config_logger:add_handler(), emqx_sys_mon:add_handler(), - emqx_config_logger:refresh_config(), ok. remove_handlers() -> From 0c80c31c9ecf4d37b2c207129df9665ef7acac59 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 20:43:59 +0100 Subject: [PATCH 061/131] test: delete log override from influxdb bridge test script --- scripts/test/influx/influx-bridge.conf | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scripts/test/influx/influx-bridge.conf b/scripts/test/influx/influx-bridge.conf index 31ddeaf79..3b5bb9f9f 100644 --- a/scripts/test/influx/influx-bridge.conf +++ b/scripts/test/influx/influx-bridge.conf @@ -34,16 +34,6 @@ bridges { } } } -log { - console_handler {enable = true, level = "warning"} - file_handlers { - default { - enable = false - file = "log/emqx.log" - level = "warning" - } - } -} rule_engine { ignore_sys_message = true jq_function_default_timeout = "10s" From 7e8253e3af5a0c1dc97252ce11f4bd65dfcaa614 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 20:45:02 +0100 Subject: [PATCH 062/131] chore: bump version to e5.0.0-rc.3 --- apps/emqx/include/emqx_release.hrl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index d3e0cd3b0..e3429aef1 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -35,7 +35,7 @@ -define(EMQX_RELEASE_CE, "5.0.15"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.0.0-rc.2"). +-define(EMQX_RELEASE_EE, "5.0.0-rc.3"). %% the HTTP API version -define(EMQX_API_VERSION, "5.0"). From 2d67bb3fb67cb8544dd401699d93ad01d67f59b2 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Sun, 29 Jan 2023 10:25:28 +0800 Subject: [PATCH 063/131] fix: /api/nodes is timeout if emqx in high load --- apps/emqx/src/emqx_os_mon.erl | 8 +- apps/emqx/src/emqx_vm.erl | 11 ++- apps/emqx/src/emqx_vm_mon.erl | 7 +- apps/emqx_management/src/emqx_mgmt.erl | 2 +- apps/emqx_management/src/emqx_mgmt_sup.erl | 13 ++- .../src/emqx_mgmt_sys_memory.erl | 79 +++++++++++++++++++ 6 files changed, 109 insertions(+), 11 deletions(-) create mode 100644 apps/emqx_management/src/emqx_mgmt_sys_memory.erl diff --git a/apps/emqx/src/emqx_os_mon.erl b/apps/emqx/src/emqx_os_mon.erl index a06f56a4c..5c6987ea0 100644 --- a/apps/emqx/src/emqx_os_mon.erl +++ b/apps/emqx/src/emqx_os_mon.erl @@ -130,8 +130,10 @@ handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = Stat handle_info({timeout, _Timer, cpu_check}, State) -> CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100, CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100, - case emqx_vm:cpu_util() of - 0 -> + CPUVal = emqx_vm:cpu_util(), + case CPUVal of + %% 0 or 0.0 + Busy when Busy == 0 -> ok; Busy when Busy > CPUHighWatermark -> _ = emqx_alarm:activate( @@ -236,5 +238,5 @@ do_update_mem_alarm_status(HWM0) -> ok. usage_msg(Usage, What) -> - %% devide by 1.0 to ensure float point number + %% divide by 1.0 to ensure float point number iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])). diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index cf1a9dc08..fc94a461a 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -232,8 +232,10 @@ mem_info() -> Free = proplists:get_value(free_memory, Dataset), [{total_memory, Total}, {used_memory, Total - Free}]. -ftos(F) -> - io_lib:format("~.2f", [F / 1.0]). +ftos(F) when is_float(F) -> + float_to_binary(F, [{decimals, 2}]); +ftos(F) when is_integer(F) -> + ftos(F / 1.0). %%%% erlang vm scheduler_usage fun copied from recon scheduler_usage(Interval) when is_integer(Interval) -> @@ -391,11 +393,12 @@ cpu_util() -> compat_windows(Fun) -> case os:type() of {win32, nt} -> - 0; + 0.0; _Type -> case catch Fun() of + Val when is_float(Val) -> floor(Val * 100) / 100; Val when is_number(Val) -> Val; - _Error -> 0 + _Error -> 0.0 end end. diff --git a/apps/emqx/src/emqx_vm_mon.erl b/apps/emqx/src/emqx_vm_mon.erl index 5447e94e9..1327a1bb0 100644 --- a/apps/emqx/src/emqx_vm_mon.erl +++ b/apps/emqx/src/emqx_vm_mon.erl @@ -63,7 +63,7 @@ handle_info({timeout, _Timer, check}, State) -> ProcessCount = erlang:system_info(process_count), case ProcessCount / erlang:system_info(process_limit) of Percent when Percent > ProcHighWatermark -> - Usage = io_lib:format("~p%", [Percent * 100]), + Usage = usage(Percent), Message = [Usage, " process usage"], emqx_alarm:activate( too_many_processes, @@ -75,7 +75,7 @@ handle_info({timeout, _Timer, check}, State) -> Message ); Percent when Percent < ProcLowWatermark -> - Usage = io_lib:format("~p%", [Percent * 100]), + Usage = usage(Percent), Message = [Usage, " process usage"], emqx_alarm:ensure_deactivated( too_many_processes, @@ -108,3 +108,6 @@ code_change(_OldVsn, State, _Extra) -> start_check_timer() -> Interval = emqx:get_config([sysmon, vm, process_check_interval]), emqx_misc:start_timer(Interval, check). + +usage(Percent) -> + integer_to_list(floor(Percent * 100)) ++ "%". diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 6b38e8ca0..09adde8bd 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -150,7 +150,7 @@ node_info() -> get_sys_memory() -> case os:type() of {unix, linux} -> - load_ctl:get_sys_memory(); + emqx_mgmt_sys_memory:get_sys_memory(); _ -> {0, 0} end. diff --git a/apps/emqx_management/src/emqx_mgmt_sup.erl b/apps/emqx_management/src/emqx_mgmt_sup.erl index 329532fa1..fa49c02a6 100644 --- a/apps/emqx_management/src/emqx_mgmt_sup.erl +++ b/apps/emqx_management/src/emqx_mgmt_sup.erl @@ -26,4 +26,15 @@ start_link() -> supervisor:start_link({local, ?MODULE}, ?MODULE, []). init([]) -> - {ok, {{one_for_one, 1, 5}, []}}. + LC = child_spec(emqx_mgmt_sys_memory, 5000, worker), + {ok, {{one_for_one, 1, 5}, [LC]}}. + +child_spec(Mod, Shutdown, Type) -> + #{ + id => Mod, + start => {Mod, start_link, []}, + restart => permanent, + shutdown => Shutdown, + type => Type, + modules => [Mod] + }. diff --git a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl new file mode 100644 index 000000000..d393caabe --- /dev/null +++ b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl @@ -0,0 +1,79 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_mgmt_sys_memory). + +-behaviour(gen_server). +-define(SYS_MEMORY_CACHE_KEY, ?MODULE). +-define(TIMEOUT, 3000). + +-export([start_link/0, get_sys_memory/0, get_sys_memory/1]). +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). + +get_sys_memory() -> + get_sys_memory(?TIMEOUT). + +get_sys_memory(Timeout) -> + try + gen_server:call(?MODULE, get_sys_memory, Timeout) + catch + exit:{timeout, _} -> + get_memory_from_cache() + end. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + {ok, #{last_time => 0}}. + +handle_call(get_sys_memory, _From, State = #{last_time := LastTime}) -> + Now = erlang:system_time(millisecond), + case Now - LastTime >= ?TIMEOUT of + true -> + Memory = load_ctl:get_sys_memory(), + persistent_term:put(?SYS_MEMORY_CACHE_KEY, Memory), + {reply, Memory, State#{last_time => Now}}; + false -> + {reply, get_memory_from_cache(), State} + end; +handle_call(_Request, _From, State = #{}) -> + {reply, ok, State}. + +handle_cast(_Request, State = #{}) -> + {noreply, State}. + +handle_info(_Info, State = #{}) -> + {noreply, State}. + +terminate(_Reason, _State = #{}) -> + ok. + +code_change(_OldVsn, State = #{}, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +get_memory_from_cache() -> + persistent_term:get(?SYS_MEMORY_CACHE_KEY, {0, 0}). From 5783127c3017da8a5855daf8ad37763aae3f52e7 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Sun, 29 Jan 2023 11:01:43 +0800 Subject: [PATCH 064/131] test: cpu_sup:load mock test --- apps/emqx/src/emqx_vm.erl | 4 +--- apps/emqx/test/emqx_vm_SUITE.erl | 16 +++++++++++++++- .../test/emqx_mgmt_api_alarms_SUITE.erl | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index fc94a461a..c1096f611 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -233,9 +233,7 @@ mem_info() -> [{total_memory, Total}, {used_memory, Total - Free}]. ftos(F) when is_float(F) -> - float_to_binary(F, [{decimals, 2}]); -ftos(F) when is_integer(F) -> - ftos(F / 1.0). + float_to_binary(F, [{decimals, 2}]). %%%% erlang vm scheduler_usage fun copied from recon scheduler_usage(Interval) when is_integer(Interval) -> diff --git a/apps/emqx/test/emqx_vm_SUITE.erl b/apps/emqx/test/emqx_vm_SUITE.erl index f9809361b..9115c5ab4 100644 --- a/apps/emqx/test/emqx_vm_SUITE.erl +++ b/apps/emqx/test/emqx_vm_SUITE.erl @@ -24,7 +24,21 @@ all() -> emqx_common_test_helpers:all(?MODULE). t_load(_Config) -> - ?assertMatch([{load1, _}, {load5, _}, {load15, _}], emqx_vm:loads()). + lists:foreach( + fun(Avg, Int) -> + emqx_common_test_helpers:with_mock( + cpu_sup, + Avg, + fun() -> Int end, + fun() -> + Load = proplists:get_value(Avg, emqx_vm:loads()), + ?assertEqual(Int / 1.0, Load) + end + ), + ?assertMatch([{load1, _}, {load5, _}, {load15, _}], emqx_vm:loads()) + end, + [{load1, 1}, {load5, 5}, {load15, 15}] + ). t_systeminfo(_Config) -> ?assertEqual( diff --git a/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl index adff41214..2c61651bf 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_alarms_SUITE.erl @@ -40,6 +40,9 @@ t_alarms_api(_) -> get_alarms(1, true), get_alarms(1, false). +t_alarm_cpu(_) -> + ok. + t_delete_alarms_api(_) -> Path = emqx_mgmt_api_test_util:api_path(["alarms"]), {ok, _} = emqx_mgmt_api_test_util:request_api(delete, Path), From 6162f90610758c14e3d280869660e70e2857ccdd Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Sat, 28 Jan 2023 11:53:54 +0800 Subject: [PATCH 065/131] fix: don't crash when OTP_VERSION file is missing --- apps/emqx/src/emqx_vm.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index c1096f611..50582a2cc 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -400,7 +400,7 @@ compat_windows(Fun) -> end end. -%% @doc Return on which Eralng/OTP the current vm is running. +%% @doc Return on which Erlang/OTP the current vm is running. %% NOTE: This API reads a file, do not use it in critical code paths. get_otp_version() -> read_otp_version(). @@ -417,6 +417,8 @@ read_otp_version() -> %% running tests etc. OtpMajor = erlang:system_info(otp_release), OtpVsnFile = filename:join([ReleasesDir, OtpMajor, "OTP_VERSION"]), - {ok, Vsn} = file:read_file(OtpVsnFile), - Vsn + case file:read_file(OtpVsnFile) of + {ok, Vsn} -> Vsn; + {error, enoent} -> list_to_binary(OtpMajor) + end end. From 0b19be074c7856c9f52a53364f05145b65eda5ed Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Sat, 28 Jan 2023 12:07:50 +0800 Subject: [PATCH 066/131] feat: cache OTP_VERSION in persistent_term --- apps/emqx/src/emqx_vm.erl | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index 50582a2cc..7da49016d 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -401,9 +401,19 @@ compat_windows(Fun) -> end. %% @doc Return on which Erlang/OTP the current vm is running. -%% NOTE: This API reads a file, do not use it in critical code paths. +%% The dashboard's /api/nodes endpoint will call this function frequently. +%% we should avoid reading file every time. +%% The OTP version never changes at runtime expect upgrade erts, +%% so we cache it in a persistent term for performance. get_otp_version() -> - read_otp_version(). + case persistent_term:get(emqx_otp_version, undefined) of + undefined -> + OtpVsn = read_otp_version(), + persistent_term:put(emqx_otp_version, OtpVsn), + OtpVsn; + OtpVsn when is_binary(OtpVsn) -> + OtpVsn + end. read_otp_version() -> ReleasesDir = filename:join([code:root_dir(), "releases"]), From b6e6315b5076c39a2c394623651fdfc44dc3b1a9 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 11:53:49 +0800 Subject: [PATCH 067/131] feat: change loads from string to float --- apps/emqx/src/emqx_alarm.erl | 5 +- apps/emqx/src/emqx_os_mon.erl | 34 +++-- apps/emqx/src/emqx_vm.erl | 12 +- apps/emqx/test/emqx_os_mon_SUITE.erl | 142 ++++++++++++++++-- apps/emqx/test/emqx_vm_SUITE.erl | 15 +- apps/emqx/test/emqx_vm_mon_SUITE.erl | 26 +++- apps/emqx_management/src/emqx_mgmt.erl | 2 +- .../src/emqx_mgmt_api_nodes.erl | 12 +- .../src/emqx_mgmt_sys_memory.erl | 2 +- .../test/emqx_mgmt_api_nodes_SUITE.erl | 4 +- 10 files changed, 199 insertions(+), 55 deletions(-) diff --git a/apps/emqx/src/emqx_alarm.erl b/apps/emqx/src/emqx_alarm.erl index 209715a85..84c40ef2a 100644 --- a/apps/emqx/src/emqx_alarm.erl +++ b/apps/emqx/src/emqx_alarm.erl @@ -325,19 +325,20 @@ deactivate_alarm( false -> ok end, + Now = erlang:system_time(microsecond), HistoryAlarm = make_deactivated_alarm( ActivateAt, Name, Details0, Msg0, - erlang:system_time(microsecond) + Now ), DeActAlarm = make_deactivated_alarm( ActivateAt, Name, Details, normalize_message(Name, iolist_to_binary(Message)), - erlang:system_time(microsecond) + Now ), mria:dirty_write(?DEACTIVATED_ALARM, HistoryAlarm), mria:dirty_delete(?ACTIVATED_ALARM, Name), diff --git a/apps/emqx/src/emqx_os_mon.erl b/apps/emqx/src/emqx_os_mon.erl index 5c6987ea0..c5ce35bf9 100644 --- a/apps/emqx/src/emqx_os_mon.erl +++ b/apps/emqx/src/emqx_os_mon.erl @@ -93,9 +93,9 @@ init([]) -> %% memsup is not reliable, ignore memsup:set_sysmem_high_watermark(1.0), SysHW = init_os_monitor(), - _ = start_mem_check_timer(), - _ = start_cpu_check_timer(), - {ok, #{sysmem_high_watermark => SysHW}}. + MemRef = start_mem_check_timer(), + CpuRef = start_cpu_check_timer(), + {ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}. init_os_monitor() -> init_os_monitor(emqx:get_config([sysmon, os])). @@ -125,8 +125,8 @@ handle_cast(Msg, State) -> handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = State) -> ok = update_mem_alarm_status(HWM), - ok = start_mem_check_timer(), - {noreply, State}; + Ref = start_mem_check_timer(), + {noreply, State#{mem_time_ref => Ref}}; handle_info({timeout, _Timer, cpu_check}, State) -> CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100, CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100, @@ -158,11 +158,14 @@ handle_info({timeout, _Timer, cpu_check}, State) -> _Busy -> ok end, - ok = start_cpu_check_timer(), - {noreply, State}; -handle_info({monitor_conf_update, OS}, _State) -> + Ref = start_cpu_check_timer(), + {noreply, State#{cpu_time_ref => Ref}}; +handle_info({monitor_conf_update, OS}, State) -> + cancel_outdated_timer(State), SysHW = init_os_monitor(OS), - {noreply, #{sysmem_high_watermark => SysHW}}; + MemRef = start_mem_check_timer(), + CpuRef = start_cpu_check_timer(), + {noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}; handle_info(Info, State) -> ?SLOG(error, #{msg => "unexpected_info", info => Info}), {noreply, State}. @@ -176,11 +179,15 @@ code_change(_OldVsn, State, _Extra) -> %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- +cancel_outdated_timer(#{mem_time_ref := MemRef, cpu_time_ref := CpuRef}) -> + emqx_misc:cancel_timer(MemRef), + emqx_misc:cancel_timer(CpuRef), + ok. start_cpu_check_timer() -> Interval = emqx:get_config([sysmon, os, cpu_check_interval]), case erlang:system_info(system_architecture) of - "x86_64-pc-linux-musl" -> ok; + "x86_64-pc-linux-musl" -> undefined; _ -> start_timer(Interval, cpu_check) end. @@ -193,12 +200,11 @@ start_mem_check_timer() -> true -> start_timer(Interval, mem_check); false -> - ok + undefined end. start_timer(Interval, Msg) -> - _ = emqx_misc:start_timer(Interval, Msg), - ok. + emqx_misc:start_timer(Interval, Msg). update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 -> ?SLOG(warning, #{msg => "discarded_out_of_range_mem_alarm_threshold", value => HWM}), @@ -225,7 +231,7 @@ do_update_mem_alarm_status(HWM0) -> }, usage_msg(Usage, mem) ); - _ -> + false -> ok = emqx_alarm:ensure_deactivated( high_system_memory_usage, #{ diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index 7da49016d..f80d18a3a 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -175,9 +175,9 @@ schedulers() -> loads() -> [ - {load1, ftos(avg1() / 256)}, - {load5, ftos(avg5() / 256)}, - {load15, ftos(avg15() / 256)} + {load1, load(avg1())}, + {load5, load(avg5())}, + {load15, load(avg15())} ]. system_info_keys() -> ?SYSTEM_INFO_KEYS. @@ -232,9 +232,6 @@ mem_info() -> Free = proplists:get_value(free_memory, Dataset), [{total_memory, Total}, {used_memory, Total - Free}]. -ftos(F) when is_float(F) -> - float_to_binary(F, [{decimals, 2}]). - %%%% erlang vm scheduler_usage fun copied from recon scheduler_usage(Interval) when is_integer(Interval) -> %% We start and stop the scheduler_wall_time system flag @@ -400,6 +397,9 @@ compat_windows(Fun) -> end end. +load(Avg) -> + floor((Avg / 256) * 100) / 100. + %% @doc Return on which Erlang/OTP the current vm is running. %% The dashboard's /api/nodes endpoint will call this function frequently. %% we should avoid reading file every time. diff --git a/apps/emqx/test/emqx_os_mon_SUITE.erl b/apps/emqx/test/emqx_os_mon_SUITE.erl index 8729bbdb6..0c5a1f261 100644 --- a/apps/emqx/test/emqx_os_mon_SUITE.erl +++ b/apps/emqx/test/emqx_os_mon_SUITE.erl @@ -25,25 +25,44 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps( - [], - fun - (emqx) -> - application:set_env(emqx, os_mon, [ - {cpu_check_interval, 1}, - {cpu_high_watermark, 5}, - {cpu_low_watermark, 80}, - {procmem_high_watermark, 5} - ]); - (_) -> - ok - end - ), + emqx_common_test_helpers:start_apps([]), Config. end_per_suite(_Config) -> emqx_common_test_helpers:stop_apps([]). +init_per_testcase(t_cpu_check_alarm, Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + SysMon = emqx_config:get([sysmon, os], #{}), + emqx_config:put([sysmon, os], SysMon#{ + cpu_high_watermark => 0.9, + cpu_low_watermark => 0, + %% 200ms + cpu_check_interval => 200 + }), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + Config; +init_per_testcase(t_sys_mem_check_alarm, Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + SysMon = emqx_config:get([sysmon, os], #{}), + emqx_config:put([sysmon, os], SysMon#{ + sysmem_high_watermark => 0.51, + %% 200ms + mem_check_interval => 200 + }), + ok = meck:new(os, [non_strict, no_link, no_history, passthrough, unstick]), + ok = meck:expect(os, type, fun() -> {unix, linux} end), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + Config; +init_per_testcase(_, Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + Config. + t_api(_) -> ?assertEqual(60000, emqx_os_mon:get_mem_check_interval()), ?assertEqual(ok, emqx_os_mon:set_mem_check_interval(30000)), @@ -67,3 +86,98 @@ t_api(_) -> emqx_os_mon ! ignored, gen_server:stop(emqx_os_mon), ok. + +t_sys_mem_check_alarm(_) -> + emqx_config:put([sysmon, os, mem_check_interval], 200), + emqx_os_mon:update(emqx_config:get([sysmon, os])), + Mem = 0.52345, + Usage = floor(Mem * 10000) / 100, + emqx_common_test_helpers:with_mock( + load_ctl, + get_memory_usage, + fun() -> Mem end, + fun() -> + timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), + ?assert( + emqx_vm_mon_SUITE:is_existing( + high_system_memory_usage, emqx_alarm:get_alarms(activated) + ), + #{ + load_ctl_memory => load_ctl:get_memory_usage(), + config => emqx_config:get([sysmon, os]), + process => sys:get_state(emqx_os_mon), + alarms => Alarms + } + ), + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 51.0, usage := RealUsage}, + message := Msg, + name := high_system_memory_usage + } + ] = + lists:filter( + fun + (#{name := high_system_memory_usage}) -> true; + (_) -> false + end, + Alarms + ), + ?assert(RealUsage >= Usage, {RealUsage, Usage}), + ?assert(is_binary(Msg)), + emqx_config:put([sysmon, os, sysmem_high_watermark], 0.99999), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + timer:sleep(600), + Activated = emqx_alarm:get_alarms(activated), + ?assertNot( + emqx_vm_mon_SUITE:is_existing(high_system_memory_usage, Activated), + #{activated => Activated, process_state => sys:get_state(emqx_os_mon)} + ) + end + ). + +t_cpu_check_alarm(_) -> + CpuUtil = 90.12345, + Usage = floor(CpuUtil * 100) / 100, + emqx_common_test_helpers:with_mock( + cpu_sup, + util, + fun() -> CpuUtil end, + fun() -> + timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), + ?assert( + emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) + ), + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 90.0, low_watermark := 0, usage := RealUsage}, + message := Msg, + name := high_cpu_usage + } + ] = + lists:filter( + fun + (#{name := high_cpu_usage}) -> true; + (_) -> false + end, + Alarms + ), + ?assert(RealUsage >= Usage, {RealUsage, Usage}), + ?assert(is_binary(Msg)), + emqx_config:put([sysmon, os, cpu_high_watermark], 1), + emqx_config:put([sysmon, os, cpu_low_watermark], 0.96), + timer:sleep(500), + ?assertNot( + emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) + ) + end + ). diff --git a/apps/emqx/test/emqx_vm_SUITE.erl b/apps/emqx/test/emqx_vm_SUITE.erl index 9115c5ab4..35f37a41e 100644 --- a/apps/emqx/test/emqx_vm_SUITE.erl +++ b/apps/emqx/test/emqx_vm_SUITE.erl @@ -25,19 +25,22 @@ all() -> emqx_common_test_helpers:all(?MODULE). t_load(_Config) -> lists:foreach( - fun(Avg, Int) -> + fun({Avg, LoadKey, Int}) -> emqx_common_test_helpers:with_mock( cpu_sup, Avg, fun() -> Int end, fun() -> - Load = proplists:get_value(Avg, emqx_vm:loads()), - ?assertEqual(Int / 1.0, Load) + Load = proplists:get_value(LoadKey, emqx_vm:loads()), + ?assertEqual(Int / 256, Load) end - ), - ?assertMatch([{load1, _}, {load5, _}, {load15, _}], emqx_vm:loads()) + ) end, - [{load1, 1}, {load5, 5}, {load15, 15}] + [{avg1, load1, 0}, {avg5, load5, 128}, {avg15, load15, 256}] + ), + ?assertMatch( + [{load1, _}, {load5, _}, {load15, _}], + emqx_vm:loads() ). t_systeminfo(_Config) -> diff --git a/apps/emqx/test/emqx_vm_mon_SUITE.erl b/apps/emqx/test/emqx_vm_mon_SUITE.erl index 140a00010..ceeffafb5 100644 --- a/apps/emqx/test/emqx_vm_mon_SUITE.erl +++ b/apps/emqx/test/emqx_vm_mon_SUITE.erl @@ -23,13 +23,13 @@ all() -> emqx_common_test_helpers:all(?MODULE). -init_per_testcase(t_alarms, Config) -> +init_per_testcase(t_too_many_processes_alarm, Config) -> emqx_common_test_helpers:boot_modules(all), emqx_common_test_helpers:start_apps([]), emqx_config:put([sysmon, vm], #{ process_high_watermark => 0, process_low_watermark => 0, - %% 1s + %% 100ms process_check_interval => 100 }), ok = supervisor:terminate_child(emqx_sys_sup, emqx_vm_mon), @@ -43,9 +43,29 @@ init_per_testcase(_, Config) -> end_per_testcase(_, _Config) -> emqx_common_test_helpers:stop_apps([]). -t_alarms(_) -> +t_too_many_processes_alarm(_) -> timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), ?assert(is_existing(too_many_processes, emqx_alarm:get_alarms(activated))), + ?assertMatch( + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 0, low_watermark := 0, usage := "0%"}, + message := <<"0% process usage">>, + name := too_many_processes + } + ], + lists:filter( + fun + (#{name := too_many_processes}) -> true; + (_) -> false + end, + Alarms + ) + ), emqx_config:put([sysmon, vm, process_high_watermark], 70), emqx_config:put([sysmon, vm, process_low_watermark], 60), timer:sleep(500), diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 09adde8bd..f794ef01d 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -126,7 +126,7 @@ lookup_node(Node) -> node_info() -> {UsedRatio, Total} = get_sys_memory(), - Info = maps:from_list([{K, list_to_binary(V)} || {K, V} <- emqx_vm:loads()]), + Info = maps:from_list(emqx_vm:loads()), BrokerInfo = emqx_sys:info(), Info#{ node => node(), diff --git a/apps/emqx_management/src/emqx_mgmt_api_nodes.erl b/apps/emqx_management/src/emqx_mgmt_api_nodes.erl index 64ef3c1ef..cb8d37609 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_nodes.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_nodes.erl @@ -159,18 +159,18 @@ fields(node_info) -> )}, {load1, mk( - string(), - #{desc => <<"CPU average load in 1 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 1 minute">>, example => 2.66} )}, {load5, mk( - string(), - #{desc => <<"CPU average load in 5 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 5 minute">>, example => 2.66} )}, {load15, mk( - string(), - #{desc => <<"CPU average load in 15 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 15 minute">>, example => 2.66} )}, {max_fds, mk( diff --git a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl index d393caabe..cc4f987b5 100644 --- a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl +++ b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl @@ -17,7 +17,7 @@ -behaviour(gen_server). -define(SYS_MEMORY_CACHE_KEY, ?MODULE). --define(TIMEOUT, 3000). +-define(TIMEOUT, 2200). -export([start_link/0, get_sys_memory/0, get_sys_memory/1]). -export([ diff --git a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl index 2bbdf938d..a0dbb9314 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl @@ -24,11 +24,11 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - emqx_mgmt_api_test_util:init_suite([emqx_conf]), + emqx_mgmt_api_test_util:init_suite([emqx_conf, emqx_management]), Config. end_per_suite(_) -> - emqx_mgmt_api_test_util:end_suite([emqx_conf]). + emqx_mgmt_api_test_util:end_suite([emqx_management, emqx_conf]). init_per_testcase(t_log_path, Config) -> emqx_config_logger:add_handler(), From c2bdb9faa7d5b10e0d00de48685ecf0e2b57920b Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 12:48:28 +0800 Subject: [PATCH 068/131] test: multiple_nodes case failed --- apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl index a0dbb9314..03b0ea2d9 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl @@ -152,7 +152,7 @@ cluster(Specs) -> Env = [{emqx, boot_modules, []}], emqx_common_test_helpers:emqx_cluster(Specs, [ {env, Env}, - {apps, [emqx_conf]}, + {apps, [emqx_conf, emqx_management]}, {load_schema, false}, {join_to, true}, {env_handler, fun From 7a9f47726718175249bc86f77a140e96b95a9075 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 00:51:34 +0800 Subject: [PATCH 069/131] feat: use emqx_mgmt_cache to cache sys_memory --- apps/emqx_management/src/emqx_mgmt_cache.erl | 104 ++++++++++++++++++ apps/emqx_management/src/emqx_mgmt_sup.erl | 2 +- .../src/emqx_mgmt_sys_memory.erl | 79 ------------- 3 files changed, 105 insertions(+), 80 deletions(-) create mode 100644 apps/emqx_management/src/emqx_mgmt_cache.erl delete mode 100644 apps/emqx_management/src/emqx_mgmt_sys_memory.erl diff --git a/apps/emqx_management/src/emqx_mgmt_cache.erl b/apps/emqx_management/src/emqx_mgmt_cache.erl new file mode 100644 index 000000000..37f8e1367 --- /dev/null +++ b/apps/emqx_management/src/emqx_mgmt_cache.erl @@ -0,0 +1,104 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_mgmt_cache). + +-behaviour(gen_server). + +-define(SYS_MEMORY_KEY, sys_memory). +-define(EXPIRED_MS, 3000). +%% -100ms to early update cache +-define(REFRESH_MS, ?EXPIRED_MS - 100). +-define(DEFAULT_BAD_MEMORY, {0, 0}). + +-export([start_link/0, get_sys_memory/0]). +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). + +get_sys_memory() -> + Now = now_millisecond(), + {CacheMem, ExpiredAt} = get_memory_from_cache(), + case Now > ExpiredAt of + true -> + erlang:send(?MODULE, fresh_sys_memory), + CacheMem; + %% stale cache value, try to recalculate + false -> + get_sys_memory_sync() + end. + +get_sys_memory_sync() -> + try + gen_server:call(?MODULE, get_sys_memory, ?EXPIRED_MS) + catch + exit:{timeout, _} -> + ?DEFAULT_BAD_MEMORY + end. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + ets:new(?MODULE, [set, named_table, public, {keypos, 1}]), + {ok, #{fresh_at => 0}}. + +handle_call(get_sys_memory, _From, State) -> + {Mem, NewState} = fresh_sys_memory(State), + {reply, Mem, NewState}; +handle_call(_Request, _From, State) -> + {reply, ok, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(fresh_sys_memory, State) -> + {_, NewState} = fresh_sys_memory(State), + {noreply, NewState}; +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +fresh_sys_memory(State = #{fresh_at := LastFreshAt}) -> + Now = now_millisecond(), + {Mem, ExpiredAt} = get_memory_from_cache(), + case Now >= ExpiredAt orelse Now - LastFreshAt >= ?REFRESH_MS of + true -> + %% NOTE: Now /= UpdateAt, because + %% load_ctl:get_sys_memory/0 maybe a heavy operation, + %% so record update_at timestamp after get_sys_memory/0. + NewMem = load_ctl:get_sys_memory(), + NewExpiredAt = now_millisecond() + ?EXPIRED_MS, + ets:insert(?MODULE, {?SYS_MEMORY_KEY, {NewMem, NewExpiredAt}}), + {NewMem, State#{fresh_at => Now}}; + false -> + {Mem, State} + end. + +get_memory_from_cache() -> + case ets:lookup(?MODULE, ?SYS_MEMORY_KEY) of + [] -> {?DEFAULT_BAD_MEMORY, 0}; + [{_, CacheVal}] -> CacheVal + end. + +now_millisecond() -> + erlang:system_time(millisecond). diff --git a/apps/emqx_management/src/emqx_mgmt_sup.erl b/apps/emqx_management/src/emqx_mgmt_sup.erl index fa49c02a6..2d9a9ba8a 100644 --- a/apps/emqx_management/src/emqx_mgmt_sup.erl +++ b/apps/emqx_management/src/emqx_mgmt_sup.erl @@ -26,7 +26,7 @@ start_link() -> supervisor:start_link({local, ?MODULE}, ?MODULE, []). init([]) -> - LC = child_spec(emqx_mgmt_sys_memory, 5000, worker), + LC = child_spec(emqx_mgmt_cache, 5000, worker), {ok, {{one_for_one, 1, 5}, [LC]}}. child_spec(Mod, Shutdown, Type) -> diff --git a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl deleted file mode 100644 index cc4f987b5..000000000 --- a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl +++ /dev/null @@ -1,79 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- --module(emqx_mgmt_sys_memory). - --behaviour(gen_server). --define(SYS_MEMORY_CACHE_KEY, ?MODULE). --define(TIMEOUT, 2200). - --export([start_link/0, get_sys_memory/0, get_sys_memory/1]). --export([ - init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3 -]). - -get_sys_memory() -> - get_sys_memory(?TIMEOUT). - -get_sys_memory(Timeout) -> - try - gen_server:call(?MODULE, get_sys_memory, Timeout) - catch - exit:{timeout, _} -> - get_memory_from_cache() - end. - -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - -init([]) -> - {ok, #{last_time => 0}}. - -handle_call(get_sys_memory, _From, State = #{last_time := LastTime}) -> - Now = erlang:system_time(millisecond), - case Now - LastTime >= ?TIMEOUT of - true -> - Memory = load_ctl:get_sys_memory(), - persistent_term:put(?SYS_MEMORY_CACHE_KEY, Memory), - {reply, Memory, State#{last_time => Now}}; - false -> - {reply, get_memory_from_cache(), State} - end; -handle_call(_Request, _From, State = #{}) -> - {reply, ok, State}. - -handle_cast(_Request, State = #{}) -> - {noreply, State}. - -handle_info(_Info, State = #{}) -> - {noreply, State}. - -terminate(_Reason, _State = #{}) -> - ok. - -code_change(_OldVsn, State = #{}, _Extra) -> - {ok, State}. - -%%%=================================================================== -%%% Internal functions -%%%=================================================================== - -get_memory_from_cache() -> - persistent_term:get(?SYS_MEMORY_CACHE_KEY, {0, 0}). From 71f00f2962a3cbd6c7bd50f2503b3c8cbf41932c Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 10:07:09 +0800 Subject: [PATCH 070/131] test: ctl suite failed --- apps/emqx_management/src/emqx_mgmt_cli.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 0e7506a0b..442d5c7de 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -315,7 +315,7 @@ vm([]) -> vm(["all"]) -> [vm([Name]) || Name <- ["load", "memory", "process", "io", "ports"]]; vm(["load"]) -> - [emqx_ctl:print("cpu/~-20s: ~ts~n", [L, V]) || {L, V} <- emqx_vm:loads()]; + [emqx_ctl:print("cpu/~-20s: ~w~n", [L, V]) || {L, V} <- emqx_vm:loads()]; vm(["memory"]) -> [emqx_ctl:print("memory/~-17s: ~w~n", [Cat, Val]) || {Cat, Val} <- erlang:memory()]; vm(["process"]) -> From 9cbad5297c8ed03fb3eb4ec306d1718788d94095 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 11:16:30 +0800 Subject: [PATCH 071/131] fix: primary logger level not update --- apps/emqx/src/config/emqx_config_logger.erl | 10 +++++++--- apps/emqx/src/emqx_app.erl | 1 - apps/emqx_conf/src/emqx_conf_app.erl | 1 + deploy/packages/emqx.service | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/config/emqx_config_logger.erl b/apps/emqx/src/config/emqx_config_logger.erl index 4d787ea0c..625a498e2 100644 --- a/apps/emqx/src/config/emqx_config_logger.erl +++ b/apps/emqx/src/config/emqx_config_logger.erl @@ -67,12 +67,16 @@ post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) -> ok. maybe_update_log_level(NewLevel) -> - OldLevel = application:get_env(kernel, logger_level, warning), + OldLevel = emqx_logger:get_primary_log_level(), case OldLevel =:= NewLevel of true -> %% no change ok; false -> + ok = emqx_logger:set_primary_log_level(NewLevel), + %% also update kernel's logger_level for troubleshooting + %% what is actually in effect is the logger's primary log level + ok = application:set_env(kernel, logger_level, NewLevel), log_to_console("Config override: log level is set to '~p'~n", [NewLevel]) end. @@ -97,7 +101,7 @@ update_log_handlers(NewHandlers) -> end, AddsAndUpdates = lists:filtermap(MapFn, NewHandlers), lists:foreach(fun update_log_handler/1, Removes ++ AddsAndUpdates), - _ = application:set_env(kernel, logger, NewHandlers), + ok = application:set_env(kernel, logger, NewHandlers), ok. update_log_handler({removed, Id}) -> @@ -115,7 +119,7 @@ id_for_log(Other) -> "log.file_handlers." ++ atom_to_list(Other). atom(Id) when is_binary(Id) -> binary_to_atom(Id, utf8); atom(Id) when is_atom(Id) -> Id. -%% @doc Translate raw config to app-env conpatible log handler configs list. +%% @doc Translate raw config to app-env compatible log handler configs list. tr_handlers(Conf) -> %% mute the default handler tr_console_handler(Conf) ++ diff --git a/apps/emqx/src/emqx_app.erl b/apps/emqx/src/emqx_app.erl index c56cdb520..6188d8030 100644 --- a/apps/emqx/src/emqx_app.erl +++ b/apps/emqx/src/emqx_app.erl @@ -42,7 +42,6 @@ start(_Type, _Args) -> ok = maybe_load_config(), ok = emqx_persistent_session:init_db_backend(), - ok = emqx_config_logger:refresh_config(), ok = maybe_start_quicer(), ok = emqx_bpapi:start(), wait_boot_shards(), diff --git a/apps/emqx_conf/src/emqx_conf_app.erl b/apps/emqx_conf/src/emqx_conf_app.erl index f2e4f6f56..34224c3f2 100644 --- a/apps/emqx_conf/src/emqx_conf_app.erl +++ b/apps/emqx_conf/src/emqx_conf_app.erl @@ -29,6 +29,7 @@ start(_StartType, _StartArgs) -> init_conf(), + ok = emqx_config_logger:refresh_config(), emqx_conf_sup:start_link(). stop(_State) -> diff --git a/deploy/packages/emqx.service b/deploy/packages/emqx.service index d505cc519..d826e358b 100644 --- a/deploy/packages/emqx.service +++ b/deploy/packages/emqx.service @@ -13,7 +13,7 @@ Environment=HOME=/var/lib/emqx # Enable logging to file Environment=EMQX_LOG__TO=default -# Start 'foregroun' but not 'start' (daemon) mode. +# Start 'foreground' but not 'start' (daemon) mode. # Because systemd monitor/restarts 'simple' services ExecStart=/bin/bash /usr/bin/emqx foreground From 95ef1e806c293d2b376dbf69e709e9a64f48c9f9 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 11:24:27 +0800 Subject: [PATCH 072/131] chore: don't crash when add_handler failed --- apps/emqx/src/config/emqx_config_logger.erl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/config/emqx_config_logger.erl b/apps/emqx/src/config/emqx_config_logger.erl index 625a498e2..15e4d3959 100644 --- a/apps/emqx/src/config/emqx_config_logger.erl +++ b/apps/emqx/src/config/emqx_config_logger.erl @@ -111,7 +111,17 @@ update_log_handler({Action, {handler, Id, Mod, Conf}}) -> log_to_console("Config override: ~s is ~p~n", [id_for_log(Id), Action]), % may return {error, {not_found, Id}} _ = logger:remove_handler(Id), - ok = logger:add_handler(Id, Mod, Conf). + case logger:add_handler(Id, Mod, Conf) of + ok -> + ok; + %% Don't crash here, otherwise the cluster rpc will retry the wrong handler forever. + {error, Reason} -> + log_to_console( + "Config override: ~s is ~p, but failed to add handler: ~p~n", + [id_for_log(Id), Action, Reason] + ) + end, + ok. id_for_log(console) -> "log.console_handler"; id_for_log(Other) -> "log.file_handlers." ++ atom_to_list(Other). From 56b9238645bf39072fec1f36e5cf69d4799b3538 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 12:21:34 +0800 Subject: [PATCH 073/131] fix: only cache sys_memory in linux --- apps/emqx_management/src/emqx_mgmt.erl | 2 +- apps/emqx_management/src/emqx_mgmt_sup.erl | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index f794ef01d..814b39cdc 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -150,7 +150,7 @@ node_info() -> get_sys_memory() -> case os:type() of {unix, linux} -> - emqx_mgmt_sys_memory:get_sys_memory(); + emqx_mgmt_cache:get_sys_memory(); _ -> {0, 0} end. diff --git a/apps/emqx_management/src/emqx_mgmt_sup.erl b/apps/emqx_management/src/emqx_mgmt_sup.erl index 2d9a9ba8a..713ff87dc 100644 --- a/apps/emqx_management/src/emqx_mgmt_sup.erl +++ b/apps/emqx_management/src/emqx_mgmt_sup.erl @@ -26,8 +26,14 @@ start_link() -> supervisor:start_link({local, ?MODULE}, ?MODULE, []). init([]) -> - LC = child_spec(emqx_mgmt_cache, 5000, worker), - {ok, {{one_for_one, 1, 5}, [LC]}}. + Workers = + case os:type() of + {unix, linux} -> + [child_spec(emqx_mgmt_cache, 5000, worker)]; + _ -> + [] + end, + {ok, {{one_for_one, 1, 5}, Workers}}. child_spec(Mod, Shutdown, Type) -> #{ From ced55719ef2fd8b93fe021c6434fcfe219a0e58d Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 14:47:41 +0800 Subject: [PATCH 074/131] chore: only run t_sys_mem_check ct in linux --- apps/emqx/test/emqx_os_mon_SUITE.erl | 39 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/apps/emqx/test/emqx_os_mon_SUITE.erl b/apps/emqx/test/emqx_os_mon_SUITE.erl index 0c5a1f261..0538d949a 100644 --- a/apps/emqx/test/emqx_os_mon_SUITE.erl +++ b/apps/emqx/test/emqx_os_mon_SUITE.erl @@ -32,8 +32,6 @@ end_per_suite(_Config) -> emqx_common_test_helpers:stop_apps([]). init_per_testcase(t_cpu_check_alarm, Config) -> - emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps([]), SysMon = emqx_config:get([sysmon, os], #{}), emqx_config:put([sysmon, os], SysMon#{ cpu_high_watermark => 0.9, @@ -45,19 +43,20 @@ init_per_testcase(t_cpu_check_alarm, Config) -> {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), Config; init_per_testcase(t_sys_mem_check_alarm, Config) -> - emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps([]), - SysMon = emqx_config:get([sysmon, os], #{}), - emqx_config:put([sysmon, os], SysMon#{ - sysmem_high_watermark => 0.51, - %% 200ms - mem_check_interval => 200 - }), - ok = meck:new(os, [non_strict, no_link, no_history, passthrough, unstick]), - ok = meck:expect(os, type, fun() -> {unix, linux} end), - ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), - {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), - Config; + case os:type() of + {unix, linux} -> + SysMon = emqx_config:get([sysmon, os], #{}), + emqx_config:put([sysmon, os], SysMon#{ + sysmem_high_watermark => 0.51, + %% 200ms + mem_check_interval => 200 + }), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + Config; + _ -> + Config + end; init_per_testcase(_, Config) -> emqx_common_test_helpers:boot_modules(all), emqx_common_test_helpers:start_apps([]), @@ -87,7 +86,15 @@ t_api(_) -> gen_server:stop(emqx_os_mon), ok. -t_sys_mem_check_alarm(_) -> +t_sys_mem_check_alarm(Config) -> + case os:type() of + {unix, linux} -> + do_sys_mem_check_alarm(Config); + _ -> + skip + end. + +do_sys_mem_check_alarm(_Config) -> emqx_config:put([sysmon, os, mem_check_interval], 200), emqx_os_mon:update(emqx_config:get([sysmon, os])), Mem = 0.52345, From bc7feeed0b78b57e62108ffe09cc27cf0efb131a Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 1 Feb 2023 08:45:04 +0100 Subject: [PATCH 075/131] fix(ehttpc): upgrade to version 0.4.6 --- mix.exs | 2 +- rebar.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mix.exs b/mix.exs index 623321f29..fdc7a3663 100644 --- a/mix.exs +++ b/mix.exs @@ -47,7 +47,7 @@ defmodule EMQXUmbrella.MixProject do {:lc, github: "emqx/lc", tag: "0.3.2", override: true}, {:redbug, "2.0.8"}, {:typerefl, github: "ieQu1/typerefl", tag: "0.9.1", override: true}, - {:ehttpc, github: "emqx/ehttpc", tag: "0.4.5", override: true}, + {:ehttpc, github: "emqx/ehttpc", tag: "0.4.6", override: true}, {:gproc, github: "uwiger/gproc", tag: "0.8.0", override: true}, {:jiffy, github: "emqx/jiffy", tag: "1.0.5", override: true}, {:cowboy, github: "emqx/cowboy", tag: "2.9.0", override: true}, diff --git a/rebar.config b/rebar.config index 71a54a03d..4ee7840ff 100644 --- a/rebar.config +++ b/rebar.config @@ -49,7 +49,7 @@ , {gpb, "4.19.5"} %% gpb only used to build, but not for release, pin it here to avoid fetching a wrong version due to rebar plugins scattered in all the deps , {typerefl, {git, "https://github.com/ieQu1/typerefl", {tag, "0.9.1"}}} , {gun, {git, "https://github.com/emqx/gun", {tag, "1.3.9"}}} - , {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.5"}}} + , {ehttpc, {git, "https://github.com/emqx/ehttpc", {tag, "0.4.6"}}} , {gproc, {git, "https://github.com/uwiger/gproc", {tag, "0.8.0"}}} , {jiffy, {git, "https://github.com/emqx/jiffy", {tag, "1.0.5"}}} , {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.0"}}} From 3c4d09a752cb4debbf72929511f47e56a6720bb3 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 16:00:55 +0800 Subject: [PATCH 076/131] fix: get_memory_cache return {ok, Mem} | stale --- apps/emqx_management/src/emqx_mgmt_cache.erl | 40 +++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_cache.erl b/apps/emqx_management/src/emqx_mgmt_cache.erl index 37f8e1367..05736a33c 100644 --- a/apps/emqx_management/src/emqx_mgmt_cache.erl +++ b/apps/emqx_management/src/emqx_mgmt_cache.erl @@ -27,14 +27,11 @@ -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). get_sys_memory() -> - Now = now_millisecond(), - {CacheMem, ExpiredAt} = get_memory_from_cache(), - case Now > ExpiredAt of - true -> + case get_memory_from_cache() of + {ok, CacheMem} -> erlang:send(?MODULE, fresh_sys_memory), CacheMem; - %% stale cache value, try to recalculate - false -> + stale -> get_sys_memory_sync() end. @@ -80,24 +77,31 @@ code_change(_OldVsn, State, _Extra) -> fresh_sys_memory(State = #{fresh_at := LastFreshAt}) -> Now = now_millisecond(), - {Mem, ExpiredAt} = get_memory_from_cache(), - case Now >= ExpiredAt orelse Now - LastFreshAt >= ?REFRESH_MS of + case Now - LastFreshAt >= ?REFRESH_MS of true -> - %% NOTE: Now /= UpdateAt, because - %% load_ctl:get_sys_memory/0 maybe a heavy operation, - %% so record update_at timestamp after get_sys_memory/0. - NewMem = load_ctl:get_sys_memory(), - NewExpiredAt = now_millisecond() + ?EXPIRED_MS, - ets:insert(?MODULE, {?SYS_MEMORY_KEY, {NewMem, NewExpiredAt}}), - {NewMem, State#{fresh_at => Now}}; + do_fresh_sys_memory(Now, State); false -> - {Mem, State} + case get_memory_from_cache() of + stale -> do_fresh_sys_memory(Now, State); + {ok, Mem} -> {Mem, State} + end end. +do_fresh_sys_memory(FreshAt, State) -> + NewMem = load_ctl:get_sys_memory(), + NewExpiredAt = now_millisecond() + ?EXPIRED_MS, + ets:insert(?MODULE, {?SYS_MEMORY_KEY, {NewMem, NewExpiredAt}}), + {NewMem, State#{fresh_at => FreshAt}}. + get_memory_from_cache() -> case ets:lookup(?MODULE, ?SYS_MEMORY_KEY) of - [] -> {?DEFAULT_BAD_MEMORY, 0}; - [{_, CacheVal}] -> CacheVal + [] -> + stale; + [{_, {Mem, ExpiredAt}}] -> + case now_millisecond() < ExpiredAt of + true -> {ok, Mem}; + false -> stale + end end. now_millisecond() -> From 6ca816f7a4d7dd80ea3b8e01ef0f88e8722f6fe3 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 1 Feb 2023 09:30:10 +0100 Subject: [PATCH 077/131] docs: remove impl details from user doc --- apps/emqx_connector/i18n/emqx_connector_http.conf | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/apps/emqx_connector/i18n/emqx_connector_http.conf b/apps/emqx_connector/i18n/emqx_connector_http.conf index 7583a38ed..da886191b 100644 --- a/apps/emqx_connector/i18n/emqx_connector_http.conf +++ b/apps/emqx_connector/i18n/emqx_connector_http.conf @@ -4,12 +4,12 @@ emqx_connector_http { en: """ The base URL is the URL includes only the scheme, host and port.
When send an HTTP request, the real URL to be used is the concatenation of the base URL and the -path parameter (passed by the emqx_resource:query/2,3 or provided by the request parameter).
+path parameter
For example: `http://localhost:9901/` """ zh: """ base URL 只包含host和port。
-发送HTTP请求时,真实的URL是由base URL 和 path parameter连接而成(通过emqx_resource:query/2,3传递,或者通过请求参数提供)。
+发送HTTP请求时,真实的URL是由base URL 和 path parameter连接而成。
示例:`http://localhost:9901/` """ } @@ -76,14 +76,8 @@ base URL 只包含host和port。
request { desc { - en: """ -If the request is provided, the caller can send HTTP requests via -emqx_resource:query(ResourceId, {send_message, BridgeId, Message}) -""" - zh: """ -如果提供了请求,调用者可以通过以下方式发送 HTTP 请求 -emqx_resource:query(ResourceId, {send_message, BridgeId, Message}) -""" + en: """Configure HTTP request parameters.""" + zh: """设置 HTTP 请求的参数。""" } label: { en: "Request" From f4381d90ca2095e211ca6972a2d4d7ec8ed37f8d Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 1 Feb 2023 09:30:30 +0100 Subject: [PATCH 078/131] refactor: http cliet should not need to retry HTTP requests for bridge --- apps/emqx_connector/src/emqx_connector_http.erl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/apps/emqx_connector/src/emqx_connector_http.erl b/apps/emqx_connector/src/emqx_connector_http.erl index 7f84c665a..33407f17c 100644 --- a/apps/emqx_connector/src/emqx_connector_http.erl +++ b/apps/emqx_connector/src/emqx_connector_http.erl @@ -264,9 +264,10 @@ on_query(InstId, {send_message, Msg}, State) -> path := Path, body := Body, headers := Headers, - request_timeout := Timeout, - max_retries := Retry + request_timeout := Timeout } = process_request(Request, Msg), + %% bridge buffer worker has retry, do not let ehttpc retry + Retry = 0, on_query( InstId, {undefined, Method, {Path, Headers, Body}, Timeout, Retry}, @@ -274,9 +275,11 @@ on_query(InstId, {send_message, Msg}, State) -> ) end; on_query(InstId, {Method, Request}, State) -> - on_query(InstId, {undefined, Method, Request, 5000, 2}, State); + %% TODO: Get retry from State + on_query(InstId, {undefined, Method, Request, 5000, _Retry = 2}, State); on_query(InstId, {Method, Request, Timeout}, State) -> - on_query(InstId, {undefined, Method, Request, Timeout, 2}, State); + %% TODO: Get retry from State + on_query(InstId, {undefined, Method, Request, Timeout, _Retry = 2}, State); on_query( InstId, {KeyOrNum, Method, Request, Timeout, Retry}, From 9f6b6cedc670f64ced528421e97eeba8e227e56d Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 1 Feb 2023 09:53:25 +0100 Subject: [PATCH 079/131] fix(webhook): pick worker according to pool type --- .../src/emqx_connector_http.erl | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/apps/emqx_connector/src/emqx_connector_http.erl b/apps/emqx_connector/src/emqx_connector_http.erl index 33407f17c..bd129f1a8 100644 --- a/apps/emqx_connector/src/emqx_connector_http.erl +++ b/apps/emqx_connector/src/emqx_connector_http.erl @@ -234,6 +234,7 @@ on_start( PoolName = emqx_plugin_libs_pool:pool_name(InstId), State = #{ pool_name => PoolName, + pool_type => PoolType, host => Host, port => Port, connect_timeout => ConnectTimeout, @@ -283,7 +284,7 @@ on_query(InstId, {Method, Request, Timeout}, State) -> on_query( InstId, {KeyOrNum, Method, Request, Timeout, Retry}, - #{pool_name := PoolName, base_path := BasePath} = State + #{base_path := BasePath} = State ) -> ?TRACE( "QUERY", @@ -291,12 +292,10 @@ on_query( #{request => Request, connector => InstId, state => State} ), NRequest = formalize_request(Method, BasePath, Request), + Worker = resolve_pool_worker(State, KeyOrNum), case ehttpc:request( - case KeyOrNum of - undefined -> PoolName; - _ -> {PoolName, KeyOrNum} - end, + Worker, Method, NRequest, Timeout, @@ -364,19 +363,15 @@ on_query_async( InstId, {KeyOrNum, Method, Request, Timeout}, ReplyFunAndArgs, - #{pool_name := PoolName, base_path := BasePath} = State + #{base_path := BasePath} = State ) -> + Worker = resolve_pool_worker(State, KeyOrNum), ?TRACE( "QUERY_ASYNC", "http_connector_received", #{request => Request, connector => InstId, state => State} ), NRequest = formalize_request(Method, BasePath, Request), - Worker = - case KeyOrNum of - undefined -> ehttpc_pool:pick_worker(PoolName); - _ -> ehttpc_pool:pick_worker(PoolName, KeyOrNum) - end, ok = ehttpc:request_async( Worker, Method, @@ -386,6 +381,16 @@ on_query_async( ), {ok, Worker}. +resolve_pool_worker(State, undefined) -> + resolve_pool_worker(State, self()); +resolve_pool_worker(#{pool_name := PoolName} = State, Key) -> + case maps:get(pool_type, State, random) of + random -> + ehttpc_pool:pick_worker(PoolName); + hash -> + ehttpc_pool:pick_worker(PoolName, Key) + end. + on_get_status(_InstId, #{pool_name := PoolName, connect_timeout := Timeout} = State) -> case do_get_status(PoolName, Timeout) of ok -> From 638291503675fc1fa33a06f55790e23bdd2fa826 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 1 Feb 2023 17:36:30 +0800 Subject: [PATCH 080/131] chore: repalce fresh by refresh --- apps/emqx_management/src/emqx_mgmt_cache.erl | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/apps/emqx_management/src/emqx_mgmt_cache.erl b/apps/emqx_management/src/emqx_mgmt_cache.erl index 05736a33c..9b3cd4f56 100644 --- a/apps/emqx_management/src/emqx_mgmt_cache.erl +++ b/apps/emqx_management/src/emqx_mgmt_cache.erl @@ -29,7 +29,7 @@ get_sys_memory() -> case get_memory_from_cache() of {ok, CacheMem} -> - erlang:send(?MODULE, fresh_sys_memory), + erlang:send(?MODULE, refresh_sys_memory), CacheMem; stale -> get_sys_memory_sync() @@ -48,10 +48,10 @@ start_link() -> init([]) -> ets:new(?MODULE, [set, named_table, public, {keypos, 1}]), - {ok, #{fresh_at => 0}}. + {ok, #{latest_refresh => 0}}. handle_call(get_sys_memory, _From, State) -> - {Mem, NewState} = fresh_sys_memory(State), + {Mem, NewState} = refresh_sys_memory(State), {reply, Mem, NewState}; handle_call(_Request, _From, State) -> {reply, ok, State}. @@ -59,8 +59,8 @@ handle_call(_Request, _From, State) -> handle_cast(_Request, State) -> {noreply, State}. -handle_info(fresh_sys_memory, State) -> - {_, NewState} = fresh_sys_memory(State), +handle_info(refresh_sys_memory, State) -> + {_, NewState} = refresh_sys_memory(State), {noreply, NewState}; handle_info(_Info, State) -> {noreply, State}. @@ -75,23 +75,23 @@ code_change(_OldVsn, State, _Extra) -> %%% Internal functions %%%=================================================================== -fresh_sys_memory(State = #{fresh_at := LastFreshAt}) -> +refresh_sys_memory(State = #{latest_refresh := LatestRefresh}) -> Now = now_millisecond(), - case Now - LastFreshAt >= ?REFRESH_MS of + case Now - LatestRefresh >= ?REFRESH_MS of true -> - do_fresh_sys_memory(Now, State); + do_refresh_sys_memory(Now, State); false -> case get_memory_from_cache() of - stale -> do_fresh_sys_memory(Now, State); + stale -> do_refresh_sys_memory(Now, State); {ok, Mem} -> {Mem, State} end end. -do_fresh_sys_memory(FreshAt, State) -> +do_refresh_sys_memory(RefreshAt, State) -> NewMem = load_ctl:get_sys_memory(), NewExpiredAt = now_millisecond() + ?EXPIRED_MS, ets:insert(?MODULE, {?SYS_MEMORY_KEY, {NewMem, NewExpiredAt}}), - {NewMem, State#{fresh_at => FreshAt}}. + {NewMem, State#{latest_refresh => RefreshAt}}. get_memory_from_cache() -> case ets:lookup(?MODULE, ?SYS_MEMORY_KEY) of From 96a18e7105598e5b2665dc47d9e1931b49e154f4 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 1 Feb 2023 09:58:21 +0100 Subject: [PATCH 081/131] chore: upgrade to hocon 0.35.3 --- apps/emqx/rebar.config | 2 +- apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl | 3 +-- apps/emqx_authz/test/emqx_authz_redis_SUITE.erl | 2 +- mix.exs | 2 +- rebar.config | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 0ea42c0fb..cad599436 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -29,7 +29,7 @@ {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.4"}}}, {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.13.9"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}}, - {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.0"}}}, + {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.3"}}}, {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}}, {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.1"}}}, {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.0"}}} diff --git a/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl b/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl index ff0bfaea7..fa0658f6a 100644 --- a/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl +++ b/apps/emqx_authn/test/emqx_authn_pgsql_SUITE.erl @@ -112,8 +112,7 @@ t_update_with_invalid_config(_Config) -> #{ kind := validation_error, path := "authentication.server", - reason := required_field, - value := undefined + reason := required_field } ]} }}}, diff --git a/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl b/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl index c07d920ad..b480e0262 100644 --- a/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl +++ b/apps/emqx_authz/test/emqx_authz_redis_SUITE.erl @@ -188,7 +188,7 @@ t_create_invalid_config(_Config) -> ?assertMatch( {error, #{ kind := validation_error, - path := "authorization.sources.1" + path := "authorization.sources.1.server" }}, emqx_authz:update(?CMD_REPLACE, [C]) ). diff --git a/mix.exs b/mix.exs index fdc7a3663..cb7d9aa3b 100644 --- a/mix.exs +++ b/mix.exs @@ -68,7 +68,7 @@ defmodule EMQXUmbrella.MixProject do # in conflict by emqtt and hocon {:getopt, "1.0.2", override: true}, {:snabbkaffe, github: "kafka4beam/snabbkaffe", tag: "1.0.0", override: true}, - {:hocon, github: "emqx/hocon", tag: "0.35.0", override: true}, + {:hocon, github: "emqx/hocon", tag: "0.35.3", override: true}, {:emqx_http_lib, github: "emqx/emqx_http_lib", tag: "0.5.2", override: true}, {:esasl, github: "emqx/esasl", tag: "0.2.0"}, {:jose, github: "potatosalad/erlang-jose", tag: "1.11.2"}, diff --git a/rebar.config b/rebar.config index 4ee7840ff..4a35641c4 100644 --- a/rebar.config +++ b/rebar.config @@ -68,7 +68,7 @@ , {system_monitor, {git, "https://github.com/ieQu1/system_monitor", {tag, "3.0.3"}}} , {getopt, "1.0.2"} , {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.0"}}} - , {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.0"}}} + , {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.35.3"}}} , {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.2"}}} , {esasl, {git, "https://github.com/emqx/esasl", {tag, "0.2.0"}}} , {jose, {git, "https://github.com/potatosalad/erlang-jose", {tag, "1.11.2"}}} From ff473e0f1bd899e9dbec726dfdaa5a8b29085ec0 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 1 Feb 2023 12:55:13 +0300 Subject: [PATCH 082/131] test(bufworker): fix testcase flapping due to data races --- .../test/emqx_resource_SUITE.erl | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 1516fc870..620516a88 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -618,6 +618,8 @@ t_query_counter_async_inflight_batch(_) -> ), tap_metrics(?LINE), + Sent1 = NumMsgs + BatchSize, + ?check_trace( begin %% this will block the resource_worker as the inflight window is full now @@ -633,6 +635,12 @@ t_query_counter_async_inflight_batch(_) -> [] ), + %% NOTE + %% The query above won't affect the size of the results table for some reason, + %% it's not clear if this is expected behaviour. Only the `async_reply_fun` + %% defined below will be called for the whole batch consisting of 2 increments. + Sent2 = Sent1 + 0, + tap_metrics(?LINE), %% send query now will fail because the resource is blocked. Insert = fun(Tab, Ref, Result) -> @@ -658,7 +666,7 @@ t_query_counter_async_inflight_batch(_) -> %% +2 because the tmp_query above will be retried and succeed %% this time. WindowSize + 2, - 10_000 + 5_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), tap_metrics(?LINE), @@ -666,8 +674,8 @@ t_query_counter_async_inflight_batch(_) -> %% since the previous tmp_query was enqueued to be retried, we %% take it again from the table; this time, it should have %% succeeded. - ?assertMatch([{tmp_query, ok}], ets:take(Tab0, tmp_query)), - ?assertEqual(NumMsgs + BatchSize, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), + ?assertEqual([{tmp_query, ok}], ets:take(Tab0, tmp_query)), + ?assertEqual(Sent2, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), %% send async query, this time everything should be ok. @@ -679,7 +687,7 @@ t_query_counter_async_inflight_batch(_) -> {ok, SRef} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), NumBatches1, - 10_000 + 5_000 ), inc_counter_in_parallel(NumMsgs1, ReqOpts), {ok, _} = snabbkaffe:receive_events(SRef), @@ -693,11 +701,10 @@ t_query_counter_async_inflight_batch(_) -> ) end ), - ?assertEqual( - NumMsgs + BatchSize + NumMsgs1, - ets:info(Tab0, size), - #{tab => ets:tab2list(Tab0)} - ), + + Sent3 = Sent2 + NumMsgs1, + + ?assertEqual(Sent3, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), tap_metrics(?LINE), %% block the resource @@ -720,22 +727,23 @@ t_query_counter_async_inflight_batch(_) -> end ), + Sent4 = Sent3 + NumMsgs + BatchSize, + %% this will block the resource_worker ok = emqx_resource:query(?ID, {inc_counter, 1}), - Sent = NumMsgs + BatchSize + NumMsgs1 + NumMsgs, {ok, SRef1} = snabbkaffe:subscribe( ?match_event(#{?snk_kind := connector_demo_inc_counter_async}), - WindowSize, - 10_000 + WindowSize + 1, + 5_000 ), ?assertMatch(ok, emqx_resource:simple_sync_query(?ID, resume)), {ok, _} = snabbkaffe:receive_events(SRef1), - ?assertEqual(Sent, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), + ?assertEqual(Sent4, ets:info(Tab0, size), #{tab => ets:tab2list(Tab0)}), {ok, Counter} = emqx_resource:simple_sync_query(?ID, get_counter), - ct:pal("get_counter: ~p, sent: ~p", [Counter, Sent]), - ?assert(Sent =< Counter), + ct:pal("get_counter: ~p, sent: ~p", [Counter, Sent4]), + ?assert(Sent4 =< Counter), %% give the metrics some time to stabilize. ct:sleep(1000), From f0395be3830a0baeeba43c7674e17733f875b47b Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 31 Jan 2023 23:18:46 +0300 Subject: [PATCH 083/131] refactor(mqtt-worker): avoid unnecessary abstraction So the code is easier to follow. --- .../src/emqx_connector_mqtt.erl | 5 +- .../src/mqtt/emqx_connector_mqtt_worker.erl | 78 ++++++++++--------- 2 files changed, 45 insertions(+), 38 deletions(-) diff --git a/apps/emqx_connector/src/emqx_connector_mqtt.erl b/apps/emqx_connector/src/emqx_connector_mqtt.erl index 462bac0b8..c1a051836 100644 --- a/apps/emqx_connector/src/emqx_connector_mqtt.erl +++ b/apps/emqx_connector/src/emqx_connector_mqtt.erl @@ -198,8 +198,9 @@ on_query_async(_InstId, {send_message, Msg}, Callback, #{name := InstanceId}) -> ?TRACE("QUERY", "async_send_msg_to_remote_node", #{message => Msg, connector => InstanceId}), case emqx_connector_mqtt_worker:send_to_remote_async(InstanceId, Msg, Callback) of ok -> - % TODO this is racy - {ok, emqx_connector_mqtt_worker:pid(InstanceId)}; + ok; + {ok, Pid} -> + {ok, Pid}; {error, Reason} -> classify_error(Reason) end. diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index 85261a063..9fac20153 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -67,8 +67,7 @@ %% APIs -export([ start_link/2, - stop/1, - pid/1 + stop/1 ]). %% management APIs @@ -175,7 +174,7 @@ mk_client_event_handler(undefined, _Opts) -> connect(Name) -> #{subscriptions := Subscriptions} = get_config(Name), - case emqtt:connect(pid(Name)) of + case emqtt:connect(get_pid(Name)) of {ok, Properties} -> case subscribe_remote_topics(Name, Subscriptions) of ok -> @@ -206,37 +205,28 @@ subscribe_remote_topics(_Ref, undefined) -> stop(Ref) -> emqtt:stop(ref(Ref)). -pid(Name) -> - gproc:lookup_pid(?NAME(Name)). - status(Ref) -> - trycall( - fun() -> - Info = emqtt:info(ref(Ref)), - case proplists:get_value(socket, Info) of - Socket when Socket /= undefined -> - connected; - undefined -> - connecting - end - end, - #{noproc => disconnected} - ). + try + Info = emqtt:info(ref(Ref)), + case proplists:get_value(socket, Info) of + Socket when Socket /= undefined -> + connected; + undefined -> + connecting + end + catch + exit:{noproc, _} -> + disconnected + end. ping(Ref) -> emqtt:ping(ref(Ref)). send_to_remote(Name, MsgIn) -> - trycall( - fun() -> do_send(Name, export_msg(Name, MsgIn)) end, - #{ - badarg => {error, disconnected}, - noproc => {error, disconnected} - } - ). + trycall(fun() -> do_send(Name, export_msg(Name, MsgIn)) end). do_send(Name, {true, Msg}) -> - case emqtt:publish(pid(Name), Msg) of + case emqtt:publish(get_pid(Name), Msg) of ok -> ok; {ok, #{reason_code := RC}} when @@ -263,13 +253,16 @@ do_send(_Name, false) -> ok. send_to_remote_async(Name, MsgIn, Callback) -> - trycall( - fun() -> do_send_async(Name, export_msg(Name, MsgIn), Callback) end, - #{badarg => {error, disconnected}} - ). + trycall(fun() -> do_send_async(Name, export_msg(Name, MsgIn), Callback) end). do_send_async(Name, {true, Msg}, Callback) -> - emqtt:publish_async(pid(Name), Msg, _Timeout = infinity, Callback); + Pid = get_pid(Name), + case emqtt:publish_async(Pid, Msg, _Timeout = infinity, Callback) of + ok -> + {ok, Pid}; + {error, _} = Error -> + Error + end; do_send_async(_Name, false, _Callback) -> ok. @@ -278,14 +271,14 @@ ref(Pid) when is_pid(Pid) -> ref(Term) -> ?REF(Term). -trycall(Fun, Else) -> +trycall(Fun) -> try Fun() catch - error:badarg -> - maps:get(badarg, Else); + throw:noproc -> + {error, disconnected}; exit:{noproc, _} -> - maps:get(noproc, Else) + {error, disconnected} end. format_mountpoint(undefined) -> @@ -325,8 +318,21 @@ pre_process_conf(Key, Conf) -> Conf#{Key => Val} end. +get_pid(Name) -> + case gproc:where(?NAME(Name)) of + Pid when is_pid(Pid) -> + Pid; + undefined -> + throw(noproc) + end. + get_config(Name) -> - gproc:lookup_value(?NAME(Name)). + try + gproc:lookup_value(?NAME(Name)) + catch + error:badarg -> + throw(noproc) + end. export_msg(Name, Msg) -> case get_config(Name) of From ad88938d34f322eaa298b217587a0ed243cfe73b Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 1 Feb 2023 16:22:19 +0300 Subject: [PATCH 084/131] refactor: reuse some parts of test code for brewity --- .../test/emqx_bridge_mqtt_SUITE.erl | 456 +++++------------- 1 file changed, 131 insertions(+), 325 deletions(-) diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index 6e3bf77ee..52084196a 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -32,7 +32,6 @@ -define(BRIDGE_CONF_DEFAULT, <<"bridges: {}">>). -define(TYPE_MQTT, <<"mqtt">>). --define(NAME_MQTT, <<"my_mqtt_bridge">>). -define(BRIDGE_NAME_INGRESS, <<"ingress_mqtt_bridge">>). -define(BRIDGE_NAME_EGRESS, <<"egress_mqtt_bridge">>). @@ -98,6 +97,24 @@ } }). +-define(assertMetrics(Pat, BridgeID), + ?assertMetrics(Pat, true, BridgeID) +). +-define(assertMetrics(Pat, Guard, BridgeID), + ?assertMatch( + #{ + <<"metrics">> := Pat, + <<"node_metrics">> := [ + #{ + <<"node">> := _, + <<"metrics">> := Pat + } + ] + } when Guard, + request_bridge_metrics(BridgeID) + ) +). + inspect(Selected, _Envs, _Args) -> persistent_term:put(?MODULE, #{inspect => Selected}). @@ -176,7 +193,7 @@ t_mqtt_conn_bridge_ingress(_) -> {ok, 201, Bridge} = request( post, uri(["bridges"]), - ?SERVER_CONF(User1)#{ + ServerConf = ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF @@ -186,6 +203,7 @@ t_mqtt_conn_bridge_ingress(_) -> <<"type">> := ?TYPE_MQTT, <<"name">> := ?BRIDGE_NAME_INGRESS } = jsx:decode(Bridge), + BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), %% we now test if the bridge works as expected @@ -198,34 +216,12 @@ t_mqtt_conn_bridge_ingress(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(RemoteTopic, Payload)), %% we should receive a message on the local broker, with specified topic - ?assert( - receive - {deliver, LocalTopic, #message{payload = Payload}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, LocalTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(LocalTopic, Payload), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDIngress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 0, <<"received">> := 1}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 0, <<"received">> := 1} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 0, <<"received">> := 1}, + BridgeIDIngress ), %% delete the bridge @@ -236,21 +232,13 @@ t_mqtt_conn_bridge_ingress(_) -> t_mqtt_conn_bridge_ingress_no_payload_template(_) -> User1 = <<"user1">>, - %% create an MQTT bridge, using POST - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDIngress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF_NO_PAYLOAD_TEMPLATE } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_INGRESS - } = jsx:decode(Bridge), - BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), %% we now test if the bridge works as expected RemoteTopic = <>, @@ -262,40 +250,13 @@ t_mqtt_conn_bridge_ingress_no_payload_template(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(RemoteTopic, Payload)), %% we should receive a message on the local broker, with specified topic - ?assert( - receive - {deliver, LocalTopic, #message{payload = MapMsg}} -> - ct:pal("local broker got message: ~p on topic ~p", [MapMsg, LocalTopic]), - %% the MapMsg is all fields outputed by Rule-Engine. it's a binary coded json here. - case jsx:decode(MapMsg) of - #{<<"payload">> := Payload} -> - true; - _ -> - false - end; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + Msg = assert_mqtt_msg_received(LocalTopic), + ?assertMatch(#{<<"payload">> := Payload}, jsx:decode(Msg#message.payload)), %% verify the metrics of the bridge - {ok, 200, BridgeStr} = request(get, uri(["bridges", BridgeIDIngress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 0, <<"received">> := 1}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 0, <<"received">> := 1} - } - ] - }, - jsx:decode(BridgeStr) + ?assertMetrics( + #{<<"matched">> := 0, <<"received">> := 1}, + BridgeIDIngress ), %% delete the bridge @@ -307,22 +268,15 @@ t_mqtt_conn_bridge_ingress_no_payload_template(_) -> t_mqtt_conn_bridge_egress(_) -> %% then we add a mqtt connector, using POST User1 = <<"user1">>, - - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), ResourceID = emqx_bridge_resource:resource_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), + %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, @@ -334,36 +288,14 @@ t_mqtt_conn_bridge_egress(_) -> emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = Payload, from = From}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, RemoteTopic]), - Size = byte_size(ResourceID), - ?assertMatch(<>, From), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + Msg = assert_mqtt_msg_received(RemoteTopic, Payload), + Size = byte_size(ResourceID), + ?assertMatch(<>, Msg#message.from), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, + BridgeIDEgress ), %% delete the bridge @@ -375,21 +307,15 @@ t_mqtt_conn_bridge_egress_no_payload_template(_) -> %% then we add a mqtt connector, using POST User1 = <<"user1">>, - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF_NO_PAYLOAD_TEMPLATE } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), ResourceID = emqx_bridge_resource:resource_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), + %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, @@ -401,42 +327,15 @@ t_mqtt_conn_bridge_egress_no_payload_template(_) -> emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = MapMsg, from = From}} -> - ct:pal("local broker got message: ~p on topic ~p", [MapMsg, RemoteTopic]), - %% the MapMsg is all fields outputed by Rule-Engine. it's a binary coded json here. - Size = byte_size(ResourceID), - ?assertMatch(<>, From), - case jsx:decode(MapMsg) of - #{<<"payload">> := Payload} -> - true; - _ -> - false - end; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + Msg = assert_mqtt_msg_received(RemoteTopic), + %% the MapMsg is all fields outputed by Rule-Engine. it's a binary coded json here. + ?assertMatch(<>, Msg#message.from), + ?assertMatch(#{<<"payload">> := Payload}, jsx:decode(Msg#message.payload)), %% verify the metrics of the bridge - {ok, 200, BridgeStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0} - } - ] - }, - jsx:decode(BridgeStr) + ?assertMetrics( + #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, + BridgeIDEgress ), %% delete the bridge @@ -447,9 +346,7 @@ t_mqtt_conn_bridge_egress_no_payload_template(_) -> t_egress_custom_clientid_prefix(_Config) -> User1 = <<"user1">>, - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"clientid_prefix">> => <<"my-custom-prefix">>, <<"type">> => ?TYPE_MQTT, @@ -457,11 +354,6 @@ t_egress_custom_clientid_prefix(_Config) -> <<"egress">> => ?EGRESS_CONF } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), ResourceID = emqx_bridge_resource:resource_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), LocalTopic = <>, RemoteTopic = <>, @@ -470,58 +362,36 @@ t_egress_custom_clientid_prefix(_Config) -> timer:sleep(100), emqx:publish(emqx_message:make(LocalTopic, Payload)), - receive - {deliver, RemoteTopic, #message{from = From}} -> - Size = byte_size(ResourceID), - ?assertMatch(<<"my-custom-prefix:", _ResouceID:Size/binary, _/binary>>, From), - ok - after 1000 -> - ct:fail("should have published message") - end, + Msg = assert_mqtt_msg_received(RemoteTopic, Payload), + Size = byte_size(ResourceID), + ?assertMatch(<<"my-custom-prefix:", _ResouceID:Size/binary, _/binary>>, Msg#message.from), {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []), ok. t_mqtt_conn_bridge_ingress_and_egress(_) -> User1 = <<"user1">>, - %% create an MQTT bridge, using POST - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDIngress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF } ), - - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_INGRESS - } = jsx:decode(Bridge), - BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), - {ok, 201, Bridge2} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge2), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, Payload = <<"hello">>, emqx:subscribe(RemoteTopic), - {ok, 200, BridgeMetricsStr1} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), #{ <<"metrics">> := #{ <<"matched">> := CntMatched1, <<"success">> := CntSuccess1, <<"failed">> := 0 @@ -538,29 +408,17 @@ t_mqtt_conn_bridge_ingress_and_egress(_) -> } } ] - } = jsx:decode(BridgeMetricsStr1), + } = request_bridge_metrics(BridgeIDEgress), timer:sleep(100), %% PUBLISH a message to the 'local' broker, as we have only one broker, %% the remote broker is also the local one. emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = Payload}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, RemoteTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(RemoteTopic, Payload), %% verify the metrics of the bridge timer:sleep(1000), - {ok, 200, BridgeMetricsStr2} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), #{ <<"metrics">> := #{ <<"matched">> := CntMatched2, <<"success">> := CntSuccess2, <<"failed">> := 0 @@ -577,7 +435,7 @@ t_mqtt_conn_bridge_ingress_and_egress(_) -> } } ] - } = jsx:decode(BridgeMetricsStr2), + } = request_bridge_metrics(BridgeIDEgress), ?assertEqual(CntMatched2, CntMatched1 + 1), ?assertEqual(CntSuccess2, CntSuccess1 + 1), ?assertEqual(NodeCntMatched2, NodeCntMatched1 + 1), @@ -590,16 +448,13 @@ t_mqtt_conn_bridge_ingress_and_egress(_) -> ok. t_ingress_mqtt_bridge_with_rules(_) -> - {ok, 201, _} = request( - post, - uri(["bridges"]), + BridgeIDIngress = create_bridge( ?SERVER_CONF(<<"user1">>)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_INGRESS, <<"ingress">> => ?INGRESS_CONF } ), - BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), {ok, 201, Rule} = request( post, @@ -624,18 +479,7 @@ t_ingress_mqtt_bridge_with_rules(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(RemoteTopic, Payload)), %% we should receive a message on the local broker, with specified topic - ?assert( - receive - {deliver, LocalTopic, #message{payload = Payload}} -> - ct:pal("local broker got message: ~p on topic ~p", [Payload, LocalTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(LocalTopic, Payload), %% and also the rule should be matched, with matched + 1: {ok, 200, Rule1} = request(get, uri(["rules", RuleId]), []), {ok, 200, Metrics} = request(get, uri(["rules", RuleId, "metrics"]), []), @@ -680,37 +524,22 @@ t_ingress_mqtt_bridge_with_rules(_) -> ), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDIngress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 0, <<"received">> := 1}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 0, <<"received">> := 1} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 0, <<"received">> := 1}, + BridgeIDIngress ), {ok, 204, <<>>} = request(delete, uri(["rules", RuleId]), []), {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDIngress]), []). t_egress_mqtt_bridge_with_rules(_) -> - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(<<"user1">>)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, <<"egress">> => ?EGRESS_CONF } ), - #{<<"type">> := ?TYPE_MQTT, <<"name">> := ?BRIDGE_NAME_EGRESS} = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), {ok, 201, Rule} = request( post, @@ -734,18 +563,7 @@ t_egress_mqtt_bridge_with_rules(_) -> %% the remote broker is also the local one. emqx:publish(emqx_message:make(LocalTopic, Payload)), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic, #message{payload = Payload}} -> - ct:pal("remote broker got message: ~p on topic ~p", [Payload, RemoteTopic]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(RemoteTopic, Payload), emqx:unsubscribe(RemoteTopic), %% PUBLISH a message to the rule. @@ -780,35 +598,12 @@ t_egress_mqtt_bridge_with_rules(_) -> ), %% we should receive a message on the "remote" broker, with specified topic - ?assert( - receive - {deliver, RemoteTopic2, #message{payload = Payload2}} -> - ct:pal("remote broker got message: ~p on topic ~p", [Payload2, RemoteTopic2]), - true; - Msg -> - ct:pal("Msg: ~p", [Msg]), - false - after 100 -> - false - end - ), + assert_mqtt_msg_received(RemoteTopic2, Payload2), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 2, <<"success">> := 2, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := #{ - <<"matched">> := 2, <<"success">> := 2, <<"failed">> := 0 - } - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 2, <<"success">> := 2, <<"failed">> := 0}, + BridgeIDEgress ), {ok, 204, <<>>} = request(delete, uri(["rules", RuleId]), []), @@ -817,10 +612,7 @@ t_egress_mqtt_bridge_with_rules(_) -> t_mqtt_conn_bridge_egress_reconnect(_) -> %% then we add a mqtt connector, using POST User1 = <<"user1">>, - - {ok, 201, Bridge} = request( - post, - uri(["bridges"]), + BridgeIDEgress = create_bridge( ?SERVER_CONF(User1)#{ <<"type">> => ?TYPE_MQTT, <<"name">> => ?BRIDGE_NAME_EGRESS, @@ -837,17 +629,14 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> } } ), - #{ - <<"type">> := ?TYPE_MQTT, - <<"name">> := ?BRIDGE_NAME_EGRESS - } = jsx:decode(Bridge), - BridgeIDEgress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_EGRESS), + on_exit(fun() -> %% delete the bridge {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []), {ok, 200, <<"[]">>} = request(get, uri(["bridges"]), []), ok end), + %% we now test if the bridge works as expected LocalTopic = <>, RemoteTopic = <>, @@ -862,20 +651,9 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> assert_mqtt_msg_received(RemoteTopic, Payload0), %% verify the metrics of the bridge - {ok, 200, BridgeMetricsStr} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - ?assertMatch( - #{ - <<"metrics">> := #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, - <<"node_metrics">> := - [ - #{ - <<"node">> := _, - <<"metrics">> := - #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0} - } - ] - }, - jsx:decode(BridgeMetricsStr) + ?assertMetrics( + #{<<"matched">> := 1, <<"success">> := 1, <<"failed">> := 0}, + BridgeIDEgress ), %% stop the listener 1883 to make the bridge disconnected @@ -906,63 +684,91 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> {ok, _} = snabbkaffe:receive_events(SRef), %% verify the metrics of the bridge, the message should be queued - {ok, 200, BridgeStr1} = request(get, uri(["bridges", BridgeIDEgress]), []), - {ok, 200, BridgeMetricsStr1} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - Decoded1 = jsx:decode(BridgeStr1), - DecodedMetrics1 = jsx:decode(BridgeMetricsStr1), ?assertMatch( - Status when (Status == <<"connecting">> orelse Status == <<"disconnected">>), - maps:get(<<"status">>, Decoded1) + #{<<"status">> := Status} when Status == <<"connecting">>; Status == <<"disconnected">>, + request_bridge(BridgeIDEgress) ), %% matched >= 3 because of possible retries. - ?assertMatch( + ?assertMetrics( #{ <<"matched">> := Matched, <<"success">> := 1, <<"failed">> := 0, <<"queuing">> := Queuing, <<"inflight">> := Inflight - } when Matched >= 3 andalso Inflight + Queuing == 2, - maps:get(<<"metrics">>, DecodedMetrics1) + }, + Matched >= 3 andalso Inflight + Queuing == 2, + BridgeIDEgress ), %% start the listener 1883 to make the bridge reconnected ok = emqx_listeners:start_listener('tcp:default'), timer:sleep(1500), %% verify the metrics of the bridge, the 2 queued messages should have been sent - {ok, 200, BridgeStr2} = request(get, uri(["bridges", BridgeIDEgress]), []), - {ok, 200, BridgeMetricsStr2} = request(get, uri(["bridges", BridgeIDEgress, "metrics"]), []), - Decoded2 = jsx:decode(BridgeStr2), - ?assertEqual(<<"connected">>, maps:get(<<"status">>, Decoded2)), + ?assertMatch(#{<<"status">> := <<"connected">>}, request_bridge(BridgeIDEgress)), %% matched >= 3 because of possible retries. - ?assertMatch( + ?assertMetrics( #{ - <<"metrics">> := #{ - <<"matched">> := Matched, - <<"success">> := 3, - <<"failed">> := 0, - <<"queuing">> := 0, - <<"retried">> := _ - } - } when Matched >= 3, - jsx:decode(BridgeMetricsStr2) + <<"matched">> := Matched, + <<"success">> := 3, + <<"failed">> := 0, + <<"queuing">> := 0, + <<"retried">> := _ + }, + Matched >= 3, + BridgeIDEgress ), %% also verify the 2 messages have been sent to the remote broker assert_mqtt_msg_received(RemoteTopic, Payload1), assert_mqtt_msg_received(RemoteTopic, Payload2), ok. +assert_mqtt_msg_received(Topic) -> + assert_mqtt_msg_received(Topic, '_', 200). + assert_mqtt_msg_received(Topic, Payload) -> - ct:pal("checking if ~p has been received on ~p", [Payload, Topic]), + assert_mqtt_msg_received(Topic, Payload, 200). + +assert_mqtt_msg_received(Topic, Payload, Timeout) -> receive - {deliver, Topic, #message{payload = Payload}} -> - ct:pal("Got mqtt message: ~p on topic ~p", [Payload, Topic]), - ok - after 300 -> + {deliver, Topic, Msg = #message{}} when Payload == '_' -> + ct:pal("received mqtt ~p on topic ~p", [Msg, Topic]), + Msg; + {deliver, Topic, Msg = #message{payload = Payload}} -> + ct:pal("received mqtt ~p on topic ~p", [Msg, Topic]), + Msg + after Timeout -> {messages, Messages} = process_info(self(), messages), - Msg = io_lib:format("timeout waiting for ~p on topic ~p", [Payload, Topic]), - error({Msg, #{messages => Messages}}) + ct:fail("timeout waiting ~p ms for ~p on topic '~s', messages = ~0p", [ + Timeout, + Payload, + Topic, + Messages + ]) end. +create_bridge(Config = #{<<"type">> := Type, <<"name">> := Name}) -> + {ok, 201, Bridge} = request( + post, + uri(["bridges"]), + Config + ), + ?assertMatch( + #{ + <<"type">> := Type, + <<"name">> := Name + }, + jsx:decode(Bridge) + ), + emqx_bridge_resource:bridge_id(Type, Name). + +request_bridge(BridgeID) -> + {ok, 200, Bridge} = request(get, uri(["bridges", BridgeID]), []), + jsx:decode(Bridge). + +request_bridge_metrics(BridgeID) -> + {ok, 200, BridgeMetrics} = request(get, uri(["bridges", BridgeID, "metrics"]), []), + jsx:decode(BridgeMetrics). + request(Method, Url, Body) -> request(<<"connector_admin">>, Method, Url, Body). From 5ebceb20d2b32ba92522ecf24c5734c4dbf3908e Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 1 Feb 2023 16:23:58 +0300 Subject: [PATCH 085/131] test(mqtt-bridge): also test reconfiguration --- apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index 52084196a..7e1d08497 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -206,6 +206,18 @@ t_mqtt_conn_bridge_ingress(_) -> BridgeIDIngress = emqx_bridge_resource:bridge_id(?TYPE_MQTT, ?BRIDGE_NAME_INGRESS), + %% try to create the bridge again + ?assertMatch( + {ok, 400, _}, + request(post, uri(["bridges"]), ServerConf) + ), + + %% try to reconfigure the bridge + ?assertMatch( + {ok, 200, _}, + request(put, uri(["bridges", BridgeIDIngress]), ServerConf) + ), + %% we now test if the bridge works as expected RemoteTopic = <>, LocalTopic = <>, From 8a46cb974e9a45ca1f3e023c3f6e11edd8868b25 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 1 Feb 2023 16:24:40 +0300 Subject: [PATCH 086/131] test(mqtt-bridge): test async bridge reconnects seamlessly --- .../test/emqx_bridge_mqtt_SUITE.erl | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index 7e1d08497..841ed885f 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -735,6 +735,89 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> assert_mqtt_msg_received(RemoteTopic, Payload2), ok. +t_mqtt_conn_bridge_egress_async_reconnect(_) -> + User1 = <<"user1">>, + BridgeIDEgress = create_bridge( + ?SERVER_CONF(User1)#{ + <<"type">> => ?TYPE_MQTT, + <<"name">> => ?BRIDGE_NAME_EGRESS, + <<"egress">> => ?EGRESS_CONF, + <<"resource_opts">> => #{ + <<"worker_pool_size">> => 2, + <<"query_mode">> => <<"async">>, + %% using a long time so we can test recovery + <<"request_timeout">> => <<"15s">>, + %% to make it check the healthy quickly + <<"health_check_interval">> => <<"0.5s">>, + %% to make it reconnect quickly + <<"auto_restart_interval">> => <<"1s">> + } + } + ), + + on_exit(fun() -> + %% delete the bridge + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []), + {ok, 200, <<"[]">>} = request(get, uri(["bridges"]), []), + ok + end), + + Self = self(), + LocalTopic = <>, + RemoteTopic = <>, + emqx:subscribe(RemoteTopic), + + Publisher = start_publisher(LocalTopic, 200, Self), + ct:sleep(1000), + + %% stop the listener 1883 to make the bridge disconnected + ok = emqx_listeners:stop_listener('tcp:default'), + ct:sleep(1500), + ?assertMatch( + #{<<"status">> := Status} when Status == <<"connecting">>; Status == <<"disconnected">>, + request_bridge(BridgeIDEgress) + ), + + %% start the listener 1883 to make the bridge reconnected + ok = emqx_listeners:start_listener('tcp:default'), + timer:sleep(1500), + ?assertMatch( + #{<<"status">> := <<"connected">>}, + request_bridge(BridgeIDEgress) + ), + + N = stop_publisher(Publisher), + + %% all those messages should eventually be delivered + [ + assert_mqtt_msg_received(RemoteTopic, Payload) + || I <- lists:seq(1, N), + Payload <- [integer_to_binary(I)] + ], + + ok. + +start_publisher(Topic, Interval, CtrlPid) -> + spawn_link(fun() -> publisher(Topic, 1, Interval, CtrlPid) end). + +stop_publisher(Pid) -> + _ = Pid ! {self(), stop}, + receive + {Pid, N} -> N + after 1_000 -> ct:fail("publisher ~p did not stop", [Pid]) + end. + +publisher(Topic, N, Delay, CtrlPid) -> + _ = emqx:publish(emqx_message:make(Topic, integer_to_binary(N))), + receive + {CtrlPid, stop} -> + CtrlPid ! {self(), N} + after Delay -> + publisher(Topic, N + 1, Delay, CtrlPid) + end. + +%% + assert_mqtt_msg_received(Topic) -> assert_mqtt_msg_received(Topic, '_', 200). From 4059d8ed8b965642a8de963b8cb21d126591dc47 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 1 Feb 2023 14:51:40 +0100 Subject: [PATCH 087/131] chore: upgrade to enterprise dashboard e1.0.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4be33b567..6da9231ed 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ export EMQX_DEFAULT_RUNNER = debian:11-slim export OTP_VSN ?= $(shell $(CURDIR)/scripts/get-otp-vsn.sh) export ELIXIR_VSN ?= $(shell $(CURDIR)/scripts/get-elixir-vsn.sh) export EMQX_DASHBOARD_VERSION ?= v1.1.6 -export EMQX_EE_DASHBOARD_VERSION ?= e1.0.1 +export EMQX_EE_DASHBOARD_VERSION ?= e1.0.2 export EMQX_REL_FORM ?= tgz export QUICER_DOWNLOAD_FROM_RELEASE = 1 ifeq ($(OS),Windows_NT) From 38ef37d109517b5ca6b6c3ac00fd29955eae3bcc Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 1 Feb 2023 14:52:08 +0100 Subject: [PATCH 088/131] chore: bump release version to e5.0.0-rc.4 --- apps/emqx/include/emqx_release.hrl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index e3429aef1..72244acaa 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -35,7 +35,7 @@ -define(EMQX_RELEASE_CE, "5.0.15"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.0.0-rc.3"). +-define(EMQX_RELEASE_EE, "5.0.0-rc.4"). %% the HTTP API version -define(EMQX_API_VERSION, "5.0"). From 5fd7f65a1f09637f4f3c7080f1e9da34d71aebd7 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 1 Feb 2023 16:52:47 +0300 Subject: [PATCH 089/131] test(bufworker): make testcase simpler to follow The confusion was due to the fact that subsequent query was missing `async_reply_fun` and thus, was not accumulating in the results. --- apps/emqx_resource/test/emqx_resource_SUITE.erl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 620516a88..3ae69a47d 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -625,7 +625,7 @@ t_query_counter_async_inflight_batch(_) -> %% this will block the resource_worker as the inflight window is full now {ok, {ok, _}} = ?wait_async_action( - emqx_resource:query(?ID, {inc_counter, 2}), + emqx_resource:query(?ID, {inc_counter, 2}, ReqOpts()), #{?snk_kind := buffer_worker_flush_but_inflight_full}, 5_000 ), @@ -635,11 +635,7 @@ t_query_counter_async_inflight_batch(_) -> [] ), - %% NOTE - %% The query above won't affect the size of the results table for some reason, - %% it's not clear if this is expected behaviour. Only the `async_reply_fun` - %% defined below will be called for the whole batch consisting of 2 increments. - Sent2 = Sent1 + 0, + Sent2 = Sent1 + 1, tap_metrics(?LINE), %% send query now will fail because the resource is blocked. From 14f528cc86f3030ddff469c63ad3c6a5d7cfc251 Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Wed, 1 Feb 2023 14:22:49 +0200 Subject: [PATCH 090/131] fix(resources): fix resource lifecycle * do not resume all buffer workers on successful healthcheck * do not pass undefined state to resource healthcheck callback --- .../src/emqx_resource_buffer_worker.erl | 2 +- .../src/emqx_resource_buffer_worker_sup.erl | 10 +++- .../src/emqx_resource_manager.erl | 14 ++--- .../test/emqx_resource_SUITE.erl | 58 +++++++++++++++++++ changes/v5.0.16/fix-9884.en.md | 5 ++ changes/v5.0.16/fix-9884.zh.md | 5 ++ 6 files changed, 85 insertions(+), 9 deletions(-) create mode 100644 changes/v5.0.16/fix-9884.en.md create mode 100644 changes/v5.0.16/fix-9884.zh.md diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index f91a994c7..4ef384da6 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -195,7 +195,7 @@ init({Id, Index, Opts}) -> {ok, running, Data}. running(enter, _, Data) -> - ?tp(buffer_worker_enter_running, #{}), + ?tp(buffer_worker_enter_running, #{id => maps:get(id, Data)}), %% According to `gen_statem' laws, we mustn't call `maybe_flush' %% directly because it may decide to return `{next_state, blocked, _}', %% and that's an invalid response for a state enter call. diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl index 4987946c9..a00dcdcd2 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker_sup.erl @@ -23,7 +23,7 @@ %% External API -export([start_link/0]). --export([start_workers/2, stop_workers/2]). +-export([start_workers/2, stop_workers/2, worker_pids/1]). %% Callbacks -export([init/1]). @@ -75,6 +75,14 @@ stop_workers(ResId, Opts) -> ensure_worker_pool_removed(ResId), ok. +worker_pids(ResId) -> + lists:map( + fun({_Name, Pid}) -> + Pid + end, + gproc_pool:active_workers(ResId) + ). + %%%============================================================================= %%% Internal %%%============================================================================= diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 232b17ce7..5de55fc4f 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -555,12 +555,14 @@ handle_connected_health_check(Data) -> end ). +with_health_check(#data{state = undefined} = Data, Func) -> + Func(disconnected, Data); with_health_check(Data, Func) -> ResId = Data#data.id, HCRes = emqx_resource:call_health_check(Data#data.manager_id, Data#data.mod, Data#data.state), {Status, NewState, Err} = parse_health_check_result(HCRes, Data), _ = maybe_alarm(Status, ResId), - ok = maybe_resume_resource_workers(Status), + ok = maybe_resume_resource_workers(ResId, Status), UpdatedData = Data#data{ state = NewState, status = Status, error = Err }, @@ -581,14 +583,12 @@ maybe_alarm(_Status, ResId) -> <<"resource down: ", ResId/binary>> ). -maybe_resume_resource_workers(connected) -> +maybe_resume_resource_workers(ResId, connected) -> lists:foreach( - fun({_, Pid, _, _}) -> - emqx_resource_buffer_worker:resume(Pid) - end, - supervisor:which_children(emqx_resource_buffer_worker_sup) + fun emqx_resource_buffer_worker:resume/1, + emqx_resource_buffer_worker_sup:worker_pids(ResId) ); -maybe_resume_resource_workers(_) -> +maybe_resume_resource_workers(_, _) -> ok. maybe_clear_alarm(<>) -> diff --git a/apps/emqx_resource/test/emqx_resource_SUITE.erl b/apps/emqx_resource/test/emqx_resource_SUITE.erl index 620516a88..0d3822ec2 100644 --- a/apps/emqx_resource/test/emqx_resource_SUITE.erl +++ b/apps/emqx_resource/test/emqx_resource_SUITE.erl @@ -24,6 +24,7 @@ -define(TEST_RESOURCE, emqx_connector_demo). -define(ID, <<"id">>). +-define(ID1, <<"id1">>). -define(DEFAULT_RESOURCE_GROUP, <<"default">>). -define(RESOURCE_ERROR(REASON), {error, {resource_error, #{reason := REASON}}}). -define(TRACE_OPTS, #{timetrap => 10000, timeout => 1000}). @@ -1033,6 +1034,63 @@ t_auto_retry(_) -> ), ?assertEqual(ok, Res). +t_health_check_disconnected(_) -> + _ = emqx_resource:create_local( + ?ID, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource, create_error => true}, + #{auto_retry_interval => 100} + ), + ?assertEqual( + {ok, disconnected}, + emqx_resource:health_check(?ID) + ). + +t_unblock_only_required_buffer_workers(_) -> + {ok, _} = emqx_resource:create( + ?ID, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource}, + #{ + query_mode => async, + batch_size => 5 + } + ), + lists:foreach( + fun emqx_resource_buffer_worker:block/1, + emqx_resource_buffer_worker_sup:worker_pids(?ID) + ), + emqx_resource:create( + ?ID1, + ?DEFAULT_RESOURCE_GROUP, + ?TEST_RESOURCE, + #{name => test_resource}, + #{ + query_mode => async, + batch_size => 5 + } + ), + %% creation of `?ID1` should not have unblocked `?ID`'s buffer workers + %% so we should see resumes now (`buffer_worker_enter_running`). + ?check_trace( + ?wait_async_action( + lists:foreach( + fun emqx_resource_buffer_worker:resume/1, + emqx_resource_buffer_worker_sup:worker_pids(?ID) + ), + #{?snk_kind := buffer_worker_enter_running}, + 5000 + ), + fun(Trace) -> + ?assertMatch( + [#{id := ?ID} | _], + ?of_kind(buffer_worker_enter_running, Trace) + ) + end + ). + t_retry_batch(_Config) -> {ok, _} = emqx_resource:create( ?ID, diff --git a/changes/v5.0.16/fix-9884.en.md b/changes/v5.0.16/fix-9884.en.md new file mode 100644 index 000000000..7676b1213 --- /dev/null +++ b/changes/v5.0.16/fix-9884.en.md @@ -0,0 +1,5 @@ +Fix resource health check process + +* Do not resume all buffer workers on successful health check. Previously after a successful healthcheck all buffer workers (for all resources) were resumed + +* Do not pass undefined state to resource health check callback. If `on_start` callback never succeeded, the state of the resource is undefined. There is no sense to pass it to `on_get_status` callback. diff --git a/changes/v5.0.16/fix-9884.zh.md b/changes/v5.0.16/fix-9884.zh.md new file mode 100644 index 000000000..6d6894242 --- /dev/null +++ b/changes/v5.0.16/fix-9884.zh.md @@ -0,0 +1,5 @@ +修复资源健康检查流程 + +* 不要在健康检查成功时恢复所有缓冲区工作者。 之前,在成功进行健康检查后,所有缓冲区工作人员(针对所有资源)都已恢复 + +* 不要将未定义的状态传递给资源健康检查回调。 如果 `on_start` 回调从未成功,资源的状态是未定义的。 将它传递给 `on_get_status` 回调是没有意义的。 From 6fd30a7e81a87359e3223bfd2bd4e43841727987 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 31 Jan 2023 12:35:20 +0100 Subject: [PATCH 091/131] docs: improve docs for system monitoring config --- apps/emqx/i18n/emqx_schema_i18n.conf | 42 +++++++++++++++++++++------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/apps/emqx/i18n/emqx_schema_i18n.conf b/apps/emqx/i18n/emqx_schema_i18n.conf index 0ab781280..67a6bf4c7 100644 --- a/apps/emqx/i18n/emqx_schema_i18n.conf +++ b/apps/emqx/i18n/emqx_schema_i18n.conf @@ -188,8 +188,12 @@ emqx_schema { sysmon_vm_long_gc { desc { - en: "Enable Long GC monitoring." - zh: "启用长垃圾回收监控。" + en: """When an Erlang process spends long time to perform garbage collection, a warning level long_gc log is emitted, +and an MQTT message is published to the system topic $SYS/sysmon/long_gc. +""" + zh: """当系统检测到某个 Erlang 进程垃圾回收占用过长时间,会触发一条带有 long_gc 关键字的日志。 +同时还会发布一条主题为 $SYS/sysmon/long_gc 的 MQTT 系统消息。 +""" } label { en: "Enable Long GC monitoring." @@ -199,8 +203,12 @@ emqx_schema { sysmon_vm_long_schedule { desc { - en: "Enable Long Schedule monitoring." - zh: "启用长调度监控。" + en: """When the Erlang VM detect a task scheduled for too long, a warning level 'long_schedule' log is emitted, +and an MQTT message is published to the system topic $SYS/sysmon/long_schedule. +""" + zh: """启用后,如果 Erlang VM 调度器出现某个任务占用时间过长时,会触发一条带有 'long_schedule' 关键字的日志。 +同时还会发布一条主题为 $SYS/sysmon/long_schedule 的 MQTT 系统消息。 +""" } label { en: "Enable Long Schedule monitoring." @@ -210,8 +218,13 @@ emqx_schema { sysmon_vm_large_heap { desc { - en: "Enable Large Heap monitoring." - zh: "启用大 heap 监控。" + en: """When an Erlang process consumed a large amount of memory for its heap space, +the system will write a warning level large_heap log, and an MQTT message is published to +the system topic $SYS/sysmon/large_heap. +""" + zh: """启用后,当一个 Erlang 进程申请了大量内存,系统会触发一条带有 large_heap 关键字的 +warning 级别日志。同时还会发布一条主题为 $SYS/sysmon/busy_dist_port 的 MQTT 系统消息。 +""" } label { en: "Enable Large Heap monitoring." @@ -221,8 +234,13 @@ emqx_schema { sysmon_vm_busy_dist_port { desc { - en: "Enable Busy Distribution Port monitoring." - zh: "启用分布式端口过忙监控。" + en: """When the RPC connection used to communicate with other nodes in the cluster is overloaded, +there will be a busy_dist_port warning log, +and an MQTT message is published to system topic $SYS/sysmon/busy_dist_port. +""" + zh: """启用后,当用于集群接点之间 RPC 的连接过忙时,会触发一条带有 busy_dist_port 关键字的 warning 级别日志。 +同时还会发布一条主题为 $SYS/sysmon/busy_dist_port 的 MQTT 系统消息。 +""" } label { en: "Enable Busy Distribution Port monitoring." @@ -232,8 +250,12 @@ emqx_schema { sysmon_vm_busy_port { desc { - en: "Enable Busy Port monitoring." - zh: "启用端口过忙监控。" + en: """When a port (e.g. TCP socket) is overloaded, there will be a busy_port warning log, +and an MQTT message is published to the system topic $SYS/sysmon/busy_port. +""" + zh: """当一个系统接口(例如 TCP socket)过忙,会触发一条带有 busy_port 关键字的 warning 级别的日志。 +同时还会发布一条主题为 $SYS/sysmon/busy_port 的 MQTT 系统消息。 +""" } label { en: "Enable Busy Port monitoring." From 1a90c1654cc565b4fb38d5a953240eb99e5dcd90 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 11:43:04 +0800 Subject: [PATCH 092/131] chore: bad typo --- apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf | 6 +++--- lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf index 7e9fcd9e7..8e349f4aa 100644 --- a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf +++ b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf @@ -59,7 +59,7 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise start_timeout { desc { en: """If 'start_after_created' enabled, how long time do we wait for the resource get started, in milliseconds.""" - zh: """如果选择了创建后立即启动资源,此选项用来设置等待资源启动的超时时间,单位毫秒。""" + zh: """如果选择了创建后立即启动资源,此选项用来设置等待资源启动的超时时间,单位秒。""" } label { en: """Start Timeout""" @@ -80,8 +80,8 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise query_mode { desc { - en: """Query mode. Optional 'sync/async', default 'sync'.""" - zh: """请求模式。可选 '同步/异步',默认为'同步'模式。""" + en: """Query mode. Optional 'sync/async', default 'async'.""" + zh: """请求模式。可选 '同步/异步',默认为'异步'模式。""" } label { en: """Query mode""" diff --git a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf index be1e581bd..cb3d6618d 100644 --- a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf +++ b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf @@ -303,7 +303,7 @@ emqx_ee_bridge_kafka { "When a single message is over the limit, it is still sent (as a single element batch)." zh: "最大消息批量字节数。" "大多数 Kafka 环境的默认最低值是 1 MB,EMQX 的默认值比 1 MB 更小是因为需要" - "补偿 Kafka 消息编码索需要的额外字节(尤其是当每条消息都很小的情况下)。" + "补偿 Kafka 消息编码所需要的额外字节(尤其是当每条消息都很小的情况下)。" "当单个消息的大小超过该限制时,它仍然会被发送,(相当于该批量中只有单个消息)。" } label { @@ -371,7 +371,7 @@ emqx_ee_bridge_kafka { "Greater value typically means better throughput. However, there can be a risk of message reordering when this " "value is greater than 1." zh: "设置 Kafka 生产者(每个分区一个)在收到 Kafka 的确认前最多发送多少个请求(批量)。" - "调大这个值通常可以增加吞吐量,但是,当该值设置大于 1 是存在消息乱序的风险。" + "调大这个值通常可以增加吞吐量,但是,当该值设置大于 1 时存在消息乱序的风险。" } label { en: "Max Inflight" @@ -395,14 +395,14 @@ emqx_ee_bridge_kafka { desc { en: "Message buffer mode.\n\n" "memory: Buffer all messages in memory. The messages will be lost in case of EMQX node restart\n" - "disc: Buffer all messages on disk. The messages on disk are able to survive EMQX node restart.\n" + "disk: Buffer all messages on disk. The messages on disk are able to survive EMQX node restart.\n" "hybrid: Buffer message in memory first, when up to certain limit " "(see segment_bytes config for more information), then start offloading " "messages to disk, Like memory mode, the messages will be lost in case of " "EMQX node restart." zh: "消息缓存模式。\n" "memory: 所有的消息都缓存在内存里。如果 EMQX 服务重启,缓存的消息会丢失。\n" - "disc: 缓存到磁盘上。EMQX 重启后会继续发送重启前未发送完成的消息。\n" + "disk: 缓存到磁盘上。EMQX 重启后会继续发送重启前未发送完成的消息。\n" "hybrid: 先将消息缓存在内存中,当内存中的消息堆积超过一定限制" "(配置项 segment_bytes 描述了该限制)后,后续的消息会缓存到磁盘上。" "与 memory 模式一样,如果 EMQX 服务重启,缓存的消息会丢失。" From f81a488010b29a9957282b35f27ac0e4fa8e1102 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 15:17:51 +0800 Subject: [PATCH 093/131] chore: logwarn if dynlibs is missing --- bin/emqx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/emqx b/bin/emqx index 2c2609b36..132d8cba7 100755 --- a/bin/emqx +++ b/bin/emqx @@ -361,7 +361,7 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then logerr "$COMPATIBILITY_INFO" exit 2 fi - logerr "Using libs from '${DYNLIBS_DIR}' due to missing from the OS." + logwarn "Using libs from '${DYNLIBS_DIR}' due to missing from the OS." fi [ "$DEBUG" -eq 1 ] && set -x fi From 44b7624c10f05093060712ec97ab9b853c1b5a89 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 09:13:18 +0100 Subject: [PATCH 094/131] docs: update changelog --- changes/v5.0.16/fix-9884.en.md | 7 ++----- changes/v5.0.16/fix-9884.zh.md | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/changes/v5.0.16/fix-9884.en.md b/changes/v5.0.16/fix-9884.en.md index 7676b1213..28eacfc86 100644 --- a/changes/v5.0.16/fix-9884.en.md +++ b/changes/v5.0.16/fix-9884.en.md @@ -1,5 +1,2 @@ -Fix resource health check process - -* Do not resume all buffer workers on successful health check. Previously after a successful healthcheck all buffer workers (for all resources) were resumed - -* Do not pass undefined state to resource health check callback. If `on_start` callback never succeeded, the state of the resource is undefined. There is no sense to pass it to `on_get_status` callback. +Do not resume all buffer workers on successful health check of any individual resource. +Previously after any successful healthcheck, all buffer workers (for all resources) were resumed diff --git a/changes/v5.0.16/fix-9884.zh.md b/changes/v5.0.16/fix-9884.zh.md index 6d6894242..08f6e7188 100644 --- a/changes/v5.0.16/fix-9884.zh.md +++ b/changes/v5.0.16/fix-9884.zh.md @@ -1,5 +1,2 @@ -修复资源健康检查流程 - -* 不要在健康检查成功时恢复所有缓冲区工作者。 之前,在成功进行健康检查后,所有缓冲区工作人员(针对所有资源)都已恢复 - -* 不要将未定义的状态传递给资源健康检查回调。 如果 `on_start` 回调从未成功,资源的状态是未定义的。 将它传递给 `on_get_status` 回调是没有意义的。 +不在任意一个资源健康检查成功时恢复所有资源发送缓存。 +在此修复之前,在任意一个资源成功进行健康检查后,所有资源的缓存都会尝试恢复。 From dcc6bd9c21974ad5c39aa621b9b478c2ad955f93 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 14:37:03 +0800 Subject: [PATCH 095/131] fix: mongodb bridge'd default connect_opt should be async, not sync --- lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl index b62871299..64fe82163 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl @@ -79,7 +79,7 @@ fields("creation_opts") -> enum([sync, async]), #{ desc => ?DESC(emqx_resource_schema, "query_mode"), - default => sync + default => async } )}; (Field) -> From 22c3f5002051ee387c4034c2c4135327a35dc4db Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 16:52:52 +0800 Subject: [PATCH 096/131] fix: add query_mode_sync_only for mysql pgsql redis mongodb bridge --- .../i18n/emqx_resource_schema_i18n.conf | 11 ++++++ .../src/schema/emqx_resource_schema.erl | 34 ++++++++++++++++--- .../src/emqx_ee_bridge_mongodb.erl | 29 ++-------------- .../src/emqx_ee_bridge_mysql.erl | 7 +--- .../src/emqx_ee_bridge_pgsql.erl | 8 +---- .../src/emqx_ee_bridge_redis.erl | 4 +-- 6 files changed, 46 insertions(+), 47 deletions(-) diff --git a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf index 8e349f4aa..b5f245df4 100644 --- a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf +++ b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf @@ -89,6 +89,17 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise } } + query_mode_sync_only { + desc { + en: """Query mode. Currently only support 'sync'.""" + zh: """请求模式。目前只支持同步模式。""" + } + label { + en: """Query mode""" + zh: """请求模式""" + } + } + request_timeout { desc { en: """Timeout for requests. If query_mode is sync, calls to the resource will be blocked for this amount of time before timing out.""" diff --git a/apps/emqx_resource/src/schema/emqx_resource_schema.erl b/apps/emqx_resource/src/schema/emqx_resource_schema.erl index 39513e28c..36b4d7d2e 100644 --- a/apps/emqx_resource/src/schema/emqx_resource_schema.erl +++ b/apps/emqx_resource/src/schema/emqx_resource_schema.erl @@ -30,16 +30,25 @@ namespace() -> "resource_schema". roots() -> []. +fields("resource_opts_sync_only") -> + [ + {resource_opts, + mk( + ref(?MODULE, "creation_opts_sync_only"), + resource_opts_meta() + )} + ]; +fields("creation_opts_sync_only") -> + Fields0 = fields("creation_opts"), + Fields1 = lists:keydelete(async_inflight_window, 1, Fields0), + QueryMod = {query_mode, fun query_mode_sync_only/1}, + lists:keyreplace(query_mode, 1, Fields1, QueryMod); fields("resource_opts") -> [ {resource_opts, mk( ref(?MODULE, "creation_opts"), - #{ - required => false, - default => #{}, - desc => ?DESC(<<"resource_opts">>) - } + resource_opts_meta() )} ]; fields("creation_opts") -> @@ -59,6 +68,13 @@ fields("creation_opts") -> {max_queue_bytes, fun max_queue_bytes/1} ]. +resource_opts_meta() -> + #{ + required => false, + default => #{}, + desc => ?DESC(<<"resource_opts">>) + }. + worker_pool_size(type) -> non_neg_integer(); worker_pool_size(desc) -> ?DESC("worker_pool_size"); worker_pool_size(default) -> ?WORKER_POOL_SIZE; @@ -95,6 +111,12 @@ query_mode(default) -> async; query_mode(required) -> false; query_mode(_) -> undefined. +query_mode_sync_only(type) -> enum([sync]); +query_mode_sync_only(desc) -> ?DESC("query_mode_sync_only"); +query_mode_sync_only(default) -> async; +query_mode_sync_only(required) -> false; +query_mode_sync_only(_) -> undefined. + request_timeout(type) -> hoconsc:union([infinity, emqx_schema:duration_ms()]); request_timeout(desc) -> ?DESC("request_timeout"); request_timeout(default) -> <<"15s">>; @@ -139,4 +161,6 @@ max_queue_bytes(required) -> false; max_queue_bytes(_) -> undefined. desc("creation_opts") -> + ?DESC("creation_opts"); +desc("creation_opts_sync_only") -> ?DESC("creation_opts"). diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl index 64fe82163..8312c081c 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mongodb.erl @@ -39,7 +39,7 @@ fields("config") -> {enable, mk(boolean(), #{desc => ?DESC("enable"), default => true})}, {collection, mk(binary(), #{desc => ?DESC("collection"), default => <<"mqtt">>})}, {payload_template, mk(binary(), #{required => false, desc => ?DESC("payload_template")})} - ] ++ fields("resource_opts"); + ] ++ emqx_resource_schema:fields("resource_opts_sync_only"); fields(mongodb_rs) -> emqx_connector_mongo:fields(rs) ++ fields("config"); fields(mongodb_sharded) -> @@ -69,32 +69,7 @@ fields("get_sharded") -> fields("get_single") -> emqx_bridge_schema:status_fields() ++ fields(mongodb_single) ++ - type_and_name_fields(mongodb_single); -fields("creation_opts") -> - lists:map( - fun - ({query_mode, _FieldSchema}) -> - {query_mode, - mk( - enum([sync, async]), - #{ - desc => ?DESC(emqx_resource_schema, "query_mode"), - default => async - } - )}; - (Field) -> - Field - end, - emqx_resource_schema:fields("creation_opts") - ); -fields("resource_opts") -> - [ - {resource_opts, - mk( - ref(?MODULE, "creation_opts"), - #{default => #{}, desc => ?DESC(emqx_resource_schema, "resource_opts")} - )} - ]. + type_and_name_fields(mongodb_single). conn_bridge_examples(Method) -> [ diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl index fadf05848..fd4d9bdd9 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_mysql.erl @@ -98,8 +98,7 @@ fields("config") -> (emqx_connector_mysql:fields(config) -- emqx_connector_schema_lib:prepare_statement_fields()); fields("creation_opts") -> - Opts = emqx_resource_schema:fields("creation_opts"), - [O || {Field, _} = O <- Opts, not is_hidden_opts(Field)]; + emqx_resource_schema:fields("creation_opts_sync_only"); fields("post") -> [type_field(), name_field() | fields("config")]; fields("put") -> @@ -118,10 +117,6 @@ desc(_) -> %% ------------------------------------------------------------------------------------------------- %% internal -is_hidden_opts(Field) -> - lists:member(Field, [ - async_inflight_window - ]). type_field() -> {type, mk(enum([mysql]), #{required => true, desc => ?DESC("desc_type")})}. diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl index 8bf7b1969..b592197f9 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_pgsql.erl @@ -100,8 +100,7 @@ fields("config") -> (emqx_connector_pgsql:fields(config) -- emqx_connector_schema_lib:prepare_statement_fields()); fields("creation_opts") -> - Opts = emqx_resource_schema:fields("creation_opts"), - [O || {Field, _} = O <- Opts, not is_hidden_opts(Field)]; + emqx_resource_schema:fields("creation_opts_sync_only"); fields("post") -> fields("post", pgsql); fields("put") -> @@ -122,11 +121,6 @@ desc(_) -> undefined. %% ------------------------------------------------------------------------------------------------- -%% internal -is_hidden_opts(Field) -> - lists:member(Field, [ - async_inflight_window - ]). type_field(Type) -> {type, mk(enum([Type]), #{required => true, desc => ?DESC("desc_type")})}. diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl index 6b7239a76..861ef243e 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl @@ -181,10 +181,10 @@ resource_fields(Type) -> resource_creation_fields("redis_cluster") -> % TODO % Cluster bridge is currently incompatible with batching. - Fields = emqx_resource_schema:fields("creation_opts"), + Fields = emqx_resource_schema:fields("creation_opts_sync_only"), lists:foldl(fun proplists:delete/2, Fields, [batch_size, batch_time]); resource_creation_fields(_) -> - emqx_resource_schema:fields("creation_opts"). + emqx_resource_schema:fields("creation_opts_sync_only"). desc("config") -> ?DESC("desc_config"); From e77675186036dfa1432b2088de64482db4b9ba72 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 16:55:11 +0800 Subject: [PATCH 097/131] fix: delete enable_batch from redis bridge --- lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl index 861ef243e..18822ba11 100644 --- a/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl +++ b/lib-ee/emqx_ee_bridge/src/emqx_ee_bridge_redis.erl @@ -182,7 +182,7 @@ resource_creation_fields("redis_cluster") -> % TODO % Cluster bridge is currently incompatible with batching. Fields = emqx_resource_schema:fields("creation_opts_sync_only"), - lists:foldl(fun proplists:delete/2, Fields, [batch_size, batch_time]); + lists:foldl(fun proplists:delete/2, Fields, [batch_size, batch_time, enable_batch]); resource_creation_fields(_) -> emqx_resource_schema:fields("creation_opts_sync_only"). From f8936013b79a4904ee705fee8ea0bb946cae8e01 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 16:58:40 +0800 Subject: [PATCH 098/131] chore: replace async with sync --- apps/emqx_resource/src/schema/emqx_resource_schema.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_resource/src/schema/emqx_resource_schema.erl b/apps/emqx_resource/src/schema/emqx_resource_schema.erl index 36b4d7d2e..fdd65bc3c 100644 --- a/apps/emqx_resource/src/schema/emqx_resource_schema.erl +++ b/apps/emqx_resource/src/schema/emqx_resource_schema.erl @@ -113,7 +113,7 @@ query_mode(_) -> undefined. query_mode_sync_only(type) -> enum([sync]); query_mode_sync_only(desc) -> ?DESC("query_mode_sync_only"); -query_mode_sync_only(default) -> async; +query_mode_sync_only(default) -> sync; query_mode_sync_only(required) -> false; query_mode_sync_only(_) -> undefined. From 22cc1cc745bde3982db5d75123b2db01dc691ff7 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 17:33:31 +0800 Subject: [PATCH 099/131] fix: make spell_check happy --- apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf index b5f245df4..3a18827f2 100644 --- a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf +++ b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf @@ -91,7 +91,7 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise query_mode_sync_only { desc { - en: """Query mode. Currently only support 'sync'.""" + en: """Query mode. only support 'sync'.""" zh: """请求模式。目前只支持同步模式。""" } label { From 1c9035d24cff320fdf99ff1c6407d0b1d98de3cf Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Thu, 2 Feb 2023 17:34:09 +0800 Subject: [PATCH 100/131] test: remove async from redis ct --- apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf | 2 +- lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf index 3a18827f2..d274d4ba2 100644 --- a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf +++ b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf @@ -91,7 +91,7 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise query_mode_sync_only { desc { - en: """Query mode. only support 'sync'.""" + en: """Query mode. Only support 'sync'.""" zh: """请求模式。目前只支持同步模式。""" } label { diff --git a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl index 1d4fa5da4..67a9b4a05 100644 --- a/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl +++ b/lib-ee/emqx_ee_bridge/test/emqx_ee_bridge_redis_SUITE.erl @@ -509,7 +509,7 @@ redis_connect_configs() -> toxiproxy_redis_bridge_config() -> Conf0 = ?REDIS_TOXYPROXY_CONNECT_CONFIG#{ <<"resource_opts">> => #{ - <<"query_mode">> => <<"async">>, + <<"query_mode">> => <<"sync">>, <<"worker_pool_size">> => <<"1">>, <<"batch_size">> => integer_to_binary(?BATCH_SIZE), <<"health_check_interval">> => <<"1s">>, @@ -537,7 +537,7 @@ resource_configs() -> <<"start_timeout">> => <<"15s">> }, batch_on => #{ - <<"query_mode">> => <<"async">>, + <<"query_mode">> => <<"sync">>, <<"worker_pool_size">> => <<"1">>, <<"batch_size">> => integer_to_binary(?BATCH_SIZE), <<"start_timeout">> => <<"15s">> From 0912f13c1f0026dec4a5185171fd7862d50a06c4 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 2 Feb 2023 12:55:10 +0300 Subject: [PATCH 101/131] fix(mqtt-bridge): stop respecting `clean_start` config parameter We are ignoring the user configuration because there's currently no reliable way to ensure proper session recovery according to the MQTT spec. --- .../test/emqx_bridge_mqtt_SUITE.erl | 23 +++++++++++++++++++ .../src/emqx_connector_mqtt.erl | 6 +++-- .../src/mqtt/emqx_connector_mqtt_schema.erl | 4 +++- .../src/mqtt/emqx_connector_mqtt_worker.erl | 15 ------------ 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index 841ed885f..8e8f45dbc 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -242,6 +242,29 @@ t_mqtt_conn_bridge_ingress(_) -> ok. +t_mqtt_conn_bridge_ignores_clean_start(_) -> + BridgeName = atom_to_binary(?FUNCTION_NAME), + BridgeID = create_bridge( + ?SERVER_CONF(<<"user1">>)#{ + <<"type">> => ?TYPE_MQTT, + <<"name">> => BridgeName, + <<"ingress">> => ?INGRESS_CONF, + <<"clean_start">> => false + } + ), + + {ok, 200, BridgeJSON} = request(get, uri(["bridges", BridgeID]), []), + Bridge = jsx:decode(BridgeJSON), + + %% verify that there's no `clean_start` in response + ?assertEqual(#{}, maps:with([<<"clean_start">>], Bridge)), + + %% delete the bridge + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeID]), []), + {ok, 200, <<"[]">>} = request(get, uri(["bridges"]), []), + + ok. + t_mqtt_conn_bridge_ingress_no_payload_template(_) -> User1 = <<"user1">>, BridgeIDIngress = create_bridge( diff --git a/apps/emqx_connector/src/emqx_connector_mqtt.erl b/apps/emqx_connector/src/emqx_connector_mqtt.erl index c1a051836..cffd138b5 100644 --- a/apps/emqx_connector/src/emqx_connector_mqtt.erl +++ b/apps/emqx_connector/src/emqx_connector_mqtt.erl @@ -251,7 +251,6 @@ basic_config( server := Server, proto_ver := ProtoVer, bridge_mode := BridgeMode, - clean_start := CleanStart, keepalive := KeepAlive, retry_interval := RetryIntv, max_inflight := MaxInflight, @@ -271,7 +270,10 @@ basic_config( %% non-standard mqtt connection packets will be filtered out by LB. %% So let's disable bridge_mode. bridge_mode => BridgeMode, - clean_start => CleanStart, + %% NOTE + %% We are ignoring the user configuration here because there's currently no reliable way + %% to ensure proper session recovery according to the MQTT spec. + clean_start => true, keepalive => ms_to_s(KeepAlive), retry_interval => RetryIntv, max_inflight => MaxInflight, diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl index 6ea609cc6..6fabc95e8 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_schema.erl @@ -110,7 +110,9 @@ fields("server_configs") -> boolean(), #{ default => true, - desc => ?DESC("clean_start") + desc => ?DESC("clean_start"), + hidden => true, + deprecated => {since, "v5.0.16"} } )}, {keepalive, mk_duration("MQTT Keepalive.", #{default => "300s"})}, diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index 9fac20153..631ac9350 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -425,18 +425,3 @@ printable_maps(Headers) -> #{}, Headers ). - -%% TODO -% maybe_destroy_session(#{connect_opts := ConnectOpts = #{clean_start := false}} = State) -> -% try -% %% Destroy session if clean_start is not set. -% %% Ignore any crashes, just refresh the clean_start = true. -% _ = do_connect(State#{connect_opts => ConnectOpts#{clean_start => true}}), -% _ = disconnect(State), -% ok -% catch -% _:_ -> -% ok -% end; -% maybe_destroy_session(_State) -> -% ok. From 0eb554a62e9f987779dcb5600ad05daf558dff4b Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 11:20:21 +0100 Subject: [PATCH 102/131] fix(kafka): check Kafka partition leader connectivity --- .../kafka/emqx_bridge_impl_kafka_producer.erl | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl index 1ac619626..ac98209ed 100644 --- a/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl +++ b/lib-ee/emqx_ee_bridge/src/kafka/emqx_bridge_impl_kafka_producer.erl @@ -91,6 +91,7 @@ on_start(InstId, Config) -> {ok, #{ message_template => compile_message_template(MessageTemplate), client_id => ClientId, + kafka_topic => KafkaTopic, producers => Producers, resource_id => ResourceID }}; @@ -234,8 +235,35 @@ on_kafka_ack(_Partition, buffer_overflow_discarded, _Callback) -> %% do not apply the callback (which is basically to bump success or fail counter) ok. -on_get_status(_InstId, _State) -> - connected. +on_get_status(_InstId, #{client_id := ClientId, kafka_topic := KafkaTopic}) -> + case wolff_client_sup:find_client(ClientId) of + {ok, Pid} -> + do_get_status(Pid, KafkaTopic); + {error, _Reason} -> + disconnected + end. + +do_get_status(Client, KafkaTopic) -> + %% TODO: add a wolff_producers:check_connectivity + case wolff_client:get_leader_connections(Client, KafkaTopic) of + {ok, Leaders} -> + %% Kafka is considered healthy as long as any of the partition leader is reachable + case + lists:any( + fun({_Partition, Pid}) -> + is_pid(Pid) andalso erlang:is_process_alive(Pid) + end, + Leaders + ) + of + true -> + connected; + false -> + disconnected + end; + {error, _} -> + disconnected + end. %% Parse comma separated host:port list into a [{Host,Port}] list hosts(Hosts) when is_binary(Hosts) -> From 9864587389ac7d8c457e8f84d1a29a2c06260a80 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 12:03:02 +0100 Subject: [PATCH 103/131] fix: send to buffer-supported connector even when disconnected --- apps/emqx_resource/src/emqx_resource.erl | 3 +- .../src/emqx_resource_buffer_worker.erl | 40 ++++++++++++------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource.erl b/apps/emqx_resource/src/emqx_resource.erl index ad7f30b47..1c5eecfbb 100644 --- a/apps/emqx_resource/src/emqx_resource.erl +++ b/apps/emqx_resource/src/emqx_resource.erl @@ -264,7 +264,8 @@ query(ResId, Request, Opts) -> case {IsBufferSupported, QM} of {true, _} -> %% only Kafka so far - emqx_resource_buffer_worker:simple_async_query(ResId, Request); + Opts1 = Opts#{is_buffer_supported => true}, + emqx_resource_buffer_worker:simple_async_query(ResId, Request, Opts1); {false, sync} -> emqx_resource_buffer_worker:sync_query(ResId, Request, Opts); {false, async} -> diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index 4ef384da6..c7b143381 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -38,7 +38,7 @@ -export([ simple_sync_query/2, - simple_async_query/2 + simple_async_query/3 ]). -export([ @@ -130,10 +130,10 @@ simple_sync_query(Id, Request) -> Result. %% simple async-query the resource without batching and queuing. --spec simple_async_query(id(), request()) -> term(). -simple_async_query(Id, Request) -> +-spec simple_async_query(id(), request(), query_opts()) -> term(). +simple_async_query(Id, Request, QueryOpts0) -> Index = undefined, - QueryOpts = simple_query_opts(), + QueryOpts = maps:merge(simple_query_opts(), QueryOpts0), emqx_resource_metrics:matched_inc(Id), Ref = make_request_ref(), Result = call_query(async, Id, Index, Ref, ?SIMPLE_QUERY(Request), QueryOpts), @@ -851,23 +851,33 @@ handle_async_worker_down(Data0, Pid) -> call_query(QM0, Id, Index, Ref, Query, QueryOpts) -> ?tp(call_query_enter, #{id => Id, query => Query}), case emqx_resource_manager:ets_lookup(Id) of - {ok, _Group, #{mod := Mod, state := ResSt, status := connected} = Data} -> - QM = - case QM0 =:= configured of - true -> maps:get(query_mode, Data); - false -> QM0 - end, - CBM = maps:get(callback_mode, Data), - CallMode = call_mode(QM, CBM), - apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, QueryOpts); {ok, _Group, #{status := stopped}} -> ?RESOURCE_ERROR(stopped, "resource stopped or disabled"); - {ok, _Group, #{status := S}} when S == connecting; S == disconnected -> - ?RESOURCE_ERROR(not_connected, "resource not connected"); + {ok, _Group, Resource} -> + QM = + case QM0 =:= configured of + true -> maps:get(query_mode, Resource); + false -> QM0 + end, + do_call_query(QM, Id, Index, Ref, Query, QueryOpts, Resource); {error, not_found} -> ?RESOURCE_ERROR(not_found, "resource not found") end. +do_call_query(QM, Id, Index, Ref, Query, #{is_buffer_supported := true} = QueryOpts, Resource) -> + %% The connector supprots buffer, send even in disconnected state + #{mod := Mod, state := ResSt, callback_mode := CBM} = Resource, + CallMode = call_mode(QM, CBM), + apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, QueryOpts); +do_call_query(QM, Id, Index, Ref, Query, QueryOpts, #{status := connected} = Resource) -> + %% when calling from the buffer worker or other simple queries, + %% only apply the query fun when it's at connected status + #{mod := Mod, state := ResSt, callback_mode := CBM} = Resource, + CallMode = call_mode(QM, CBM), + apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, QueryOpts); +do_call_query(_QM, _Id, _Index, _Ref, _Query, _QueryOpts, _Data) -> + ?RESOURCE_ERROR(not_connected, "resource not connected"). + -define(APPLY_RESOURCE(NAME, EXPR, REQ), try %% if the callback module (connector) wants to return an error that From 7ad80d012ca381968c0611c67c4bde7866edd6cb Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 12:23:15 +0100 Subject: [PATCH 104/131] chore(kafka): upgrade to woff 1.7.5 comparing to 1.7.4, a counter bump bug was fixed but it does not affect EMQX as the bridge does not make use of the counter which had issue. --- lib-ee/emqx_ee_bridge/rebar.config | 2 +- mix.exs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib-ee/emqx_ee_bridge/rebar.config b/lib-ee/emqx_ee_bridge/rebar.config index 6ca554c72..fa6dd560e 100644 --- a/lib-ee/emqx_ee_bridge/rebar.config +++ b/lib-ee/emqx_ee_bridge/rebar.config @@ -1,5 +1,5 @@ {erl_opts, [debug_info]}. -{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.4"}}} +{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.5"}}} , {kafka_protocol, {git, "https://github.com/kafka4beam/kafka_protocol.git", {tag, "4.1.2"}}} , {brod_gssapi, {git, "https://github.com/kafka4beam/brod_gssapi.git", {tag, "v0.1.0-rc1"}}} , {brod, {git, "https://github.com/kafka4beam/brod.git", {tag, "3.16.7"}}} diff --git a/mix.exs b/mix.exs index 8e271390f..c6e497d37 100644 --- a/mix.exs +++ b/mix.exs @@ -130,7 +130,7 @@ defmodule EMQXUmbrella.MixProject do [ {:hstreamdb_erl, github: "hstreamdb/hstreamdb_erl", tag: "0.2.5"}, {:influxdb, github: "emqx/influxdb-client-erl", tag: "1.1.7", override: true}, - {:wolff, github: "kafka4beam/wolff", tag: "1.7.4"}, + {:wolff, github: "kafka4beam/wolff", tag: "1.7.5"}, {:kafka_protocol, github: "kafka4beam/kafka_protocol", tag: "4.1.2", override: true}, {:brod_gssapi, github: "kafka4beam/brod_gssapi", tag: "v0.1.0-rc1"}, {:brod, github: "kafka4beam/brod", tag: "3.16.7"}, From d5c482b0b0724a168556d016559c050038782c7e Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 12:45:27 +0100 Subject: [PATCH 105/131] docs: remove timer unit from description the user input has time unit. e.g. "5s" for 5 seconds etc. --- apps/emqx/i18n/emqx_schema_i18n.conf | 4 ++-- apps/emqx_conf/i18n/emqx_conf_schema.conf | 2 +- apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf | 2 +- apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf | 4 ++-- apps/emqx_resource/src/emqx_resource_buffer_worker.erl | 2 +- lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/apps/emqx/i18n/emqx_schema_i18n.conf b/apps/emqx/i18n/emqx_schema_i18n.conf index 67a6bf4c7..95790fae7 100644 --- a/apps/emqx/i18n/emqx_schema_i18n.conf +++ b/apps/emqx/i18n/emqx_schema_i18n.conf @@ -46,8 +46,8 @@ emqx_schema { overload_protection_backoff_delay { desc { - en: "When at high load, some unimportant tasks could be delayed for execution, here set the duration in milliseconds precision." - zh: "高负载时,一些不重要的任务可能会延迟执行,在这里设置允许延迟的时间。单位为毫秒。" + en: "When at high load, some unimportant tasks could be delayed for execution, here set the duration." + zh: "高负载时,一些不重要的任务可能会延迟执行,在这里设置允许延迟的时间。" } label { en: "Delay Time" diff --git a/apps/emqx_conf/i18n/emqx_conf_schema.conf b/apps/emqx_conf/i18n/emqx_conf_schema.conf index 131118f5b..522e8cf6e 100644 --- a/apps/emqx_conf/i18n/emqx_conf_schema.conf +++ b/apps/emqx_conf/i18n/emqx_conf_schema.conf @@ -1255,7 +1255,7 @@ Supervisor 报告的类型。默认为 error 类型。 log_overload_kill_restart_after { desc { - en: """If the handler is terminated, it restarts automatically after a delay specified in milliseconds. The value `infinity` prevents restarts.""" + en: """If the handler is terminated, it restarts automatically after a delay. The value `infinity` prevents restarts.""" zh: """如果处理进程终止,它会在以指定的时间后后自动重新启动。 `infinity` 不自动重启。""" } label { diff --git a/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf b/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf index 0ab09520e..ca8bf0769 100644 --- a/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf +++ b/apps/emqx_management/i18n/emqx_mgmt_api_alarms_i18n.conf @@ -62,7 +62,7 @@ The default is false.""" duration { desc { - en: """Indicates how long the alarm has lasted, in milliseconds.""" + en: """Indicates how long the alarm has been active in milliseconds.""" zh: """表明告警已经持续了多久,单位:毫秒。""" } } diff --git a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf index 8e349f4aa..c46de294b 100644 --- a/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf +++ b/apps/emqx_resource/i18n/emqx_resource_schema_i18n.conf @@ -58,8 +58,8 @@ For bridges only have ingress direction data flow, it can be set to 0 otherwise start_timeout { desc { - en: """If 'start_after_created' enabled, how long time do we wait for the resource get started, in milliseconds.""" - zh: """如果选择了创建后立即启动资源,此选项用来设置等待资源启动的超时时间,单位秒。""" + en: """Time interval to wait for an auto-started resource to become healthy before responding resource creation requests.""" + zh: """在回复资源创建请求前等待资源进入健康状态的时间。""" } label { en: """Start Timeout""" diff --git a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl index c7b143381..29fe79d09 100644 --- a/apps/emqx_resource/src/emqx_resource_buffer_worker.erl +++ b/apps/emqx_resource/src/emqx_resource_buffer_worker.erl @@ -865,7 +865,7 @@ call_query(QM0, Id, Index, Ref, Query, QueryOpts) -> end. do_call_query(QM, Id, Index, Ref, Query, #{is_buffer_supported := true} = QueryOpts, Resource) -> - %% The connector supprots buffer, send even in disconnected state + %% The connector supports buffer, send even in disconnected state #{mod := Mod, state := ResSt, callback_mode := CBM} = Resource, CallMode = call_mode(QM, CBM), apply_query_fun(CallMode, Mod, Id, Index, Ref, Query, ResSt, QueryOpts); diff --git a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf index cb3d6618d..fff798e19 100644 --- a/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf +++ b/lib-ee/emqx_ee_bridge/i18n/emqx_ee_bridge_kafka.conf @@ -220,7 +220,7 @@ emqx_ee_bridge_kafka { desc { en: "When set to 'true', TCP buffer sent as soon as possible. " "Otherwise, the OS kernel may buffer small TCP packets for a while (40 ms by default)." - zh: "设置 ‘true' 让系统内核立即发送。否则当需要发送当内容很少时,可能会有一定延迟(默认 40 毫秒)。" + zh: "设置‘true’让系统内核立即发送。否则当需要发送的内容很少时,可能会有一定延迟(默认 40 毫秒)。" } label { en: "No Delay" From cf8432227abbbdfba44f3c2e96f2b153dcb69ee3 Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Thu, 2 Feb 2023 13:40:29 +0100 Subject: [PATCH 106/131] ci: always run static_checks --- .github/workflows/build_slim_packages.yaml | 4 ---- .github/workflows/run_test_cases.yaml | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/build_slim_packages.yaml b/.github/workflows/build_slim_packages.yaml index bfdeb1fb8..88e454860 100644 --- a/.github/workflows/build_slim_packages.yaml +++ b/.github/workflows/build_slim_packages.yaml @@ -55,10 +55,6 @@ jobs: run: | make ${EMQX_NAME}-tgz ./scripts/pkg-tests.sh ${EMQX_NAME}-tgz - - name: run static checks - if: contains(matrix.os, 'ubuntu') - run: | - make static_checks - name: build and test deb/rpm packages run: | make ${EMQX_NAME}-pkg diff --git a/.github/workflows/run_test_cases.yaml b/.github/workflows/run_test_cases.yaml index 2d9d2f01e..ac0edef13 100644 --- a/.github/workflows/run_test_cases.yaml +++ b/.github/workflows/run_test_cases.yaml @@ -77,6 +77,7 @@ jobs: make ensure-rebar3 # fetch all deps and compile make ${{ matrix.profile }} + make static_checks make test-compile cd .. zip -ryq source.zip source/* source/.[^.]* From 5b1e9764126f371ea3d2a94b2801947e0306d6e3 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 13:56:09 +0100 Subject: [PATCH 107/131] chore: upgrade replayq from 0.3.6 to 0.3.7 only included a dialyzer fix --- mix.exs | 2 +- rebar.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mix.exs b/mix.exs index c6e497d37..6fbddb28b 100644 --- a/mix.exs +++ b/mix.exs @@ -58,7 +58,7 @@ defmodule EMQXUmbrella.MixProject do {:grpc, github: "emqx/grpc-erl", tag: "0.6.7", override: true}, {:minirest, github: "emqx/minirest", tag: "1.3.7", override: true}, {:ecpool, github: "emqx/ecpool", tag: "0.5.3", override: true}, - {:replayq, github: "emqx/replayq", tag: "0.3.6", override: true}, + {:replayq, github: "emqx/replayq", tag: "0.3.7", override: true}, {:pbkdf2, github: "emqx/erlang-pbkdf2", tag: "2.0.4", override: true}, {:emqtt, github: "emqx/emqtt", tag: "1.7.0-rc.2", override: true}, {:rulesql, github: "emqx/rulesql", tag: "0.1.4"}, diff --git a/rebar.config b/rebar.config index 4a35641c4..e61aac544 100644 --- a/rebar.config +++ b/rebar.config @@ -60,7 +60,7 @@ , {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.7"}}} , {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.7"}}} , {ecpool, {git, "https://github.com/emqx/ecpool", {tag, "0.5.3"}}} - , {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.6"}}} + , {replayq, {git, "https://github.com/emqx/replayq.git", {tag, "0.3.7"}}} , {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}} , {emqtt, {git, "https://github.com/emqx/emqtt", {tag, "1.7.0-rc.2"}}} , {rulesql, {git, "https://github.com/emqx/rulesql", {tag, "0.1.4"}}} From c0d478bd414f125d4eb10a76f82b1f083a2965de Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 13:59:28 +0100 Subject: [PATCH 108/131] fix(buffer_worker): type spec --- apps/emqx_resource/include/emqx_resource.hrl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/emqx_resource/include/emqx_resource.hrl b/apps/emqx_resource/include/emqx_resource.hrl index 7464eb4f8..ce4e02c2a 100644 --- a/apps/emqx_resource/include/emqx_resource.hrl +++ b/apps/emqx_resource/include/emqx_resource.hrl @@ -31,7 +31,9 @@ pick_key => term(), timeout => timeout(), expire_at => infinity | integer(), - async_reply_fun => reply_fun() + async_reply_fun => reply_fun(), + simple_query => boolean(), + is_buffer_supported => boolean() }. -type resource_data() :: #{ id := resource_id(), From 1c748070c115cc668b59f06c959d26a83cd28c1b Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 14:01:27 +0100 Subject: [PATCH 109/131] fix(emqx_connector_mqtt_worker): publish_async can only return ok # Please enter the commit message for your changes. Lines starting --- .../src/mqtt/emqx_connector_mqtt_worker.erl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl index 631ac9350..6da63f99a 100644 --- a/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl +++ b/apps/emqx_connector/src/mqtt/emqx_connector_mqtt_worker.erl @@ -257,12 +257,8 @@ send_to_remote_async(Name, MsgIn, Callback) -> do_send_async(Name, {true, Msg}, Callback) -> Pid = get_pid(Name), - case emqtt:publish_async(Pid, Msg, _Timeout = infinity, Callback) of - ok -> - {ok, Pid}; - {error, _} = Error -> - Error - end; + ok = emqtt:publish_async(Pid, Msg, _Timeout = infinity, Callback), + {ok, Pid}; do_send_async(_Name, false, _Callback) -> ok. From 9a9943d35db63e0f9802dc3c9e81270819940a6d Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 14:02:34 +0100 Subject: [PATCH 110/131] chore(emqx_mgmt_cache): ignore unused return value for dialyer --- apps/emqx_management/src/emqx_mgmt_cache.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_management/src/emqx_mgmt_cache.erl b/apps/emqx_management/src/emqx_mgmt_cache.erl index 9b3cd4f56..e7f9ac0b1 100644 --- a/apps/emqx_management/src/emqx_mgmt_cache.erl +++ b/apps/emqx_management/src/emqx_mgmt_cache.erl @@ -47,7 +47,7 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). init([]) -> - ets:new(?MODULE, [set, named_table, public, {keypos, 1}]), + _ = ets:new(?MODULE, [set, named_table, public, {keypos, 1}]), {ok, #{latest_refresh => 0}}. handle_call(get_sys_memory, _From, State) -> From 58627b7958b03ca2f4ee087878fd22d9eb1db45d Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 14:08:02 +0100 Subject: [PATCH 111/131] chore(emqx_resource_manager): ignore unused return value for dialyzer --- apps/emqx_resource/src/emqx_resource_manager.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 5de55fc4f..db4441d88 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -194,7 +194,7 @@ remove(ResId, ClearMetrics) when is_binary(ResId) -> restart(ResId, Opts) when is_binary(ResId) -> case safe_call(ResId, restart, ?T_OPERATION) of ok -> - wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), + _ = wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), ok; {error, _Reason} = Error -> Error @@ -205,7 +205,7 @@ restart(ResId, Opts) when is_binary(ResId) -> start(ResId, Opts) -> case safe_call(ResId, start, ?T_OPERATION) of ok -> - wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), + _ = wait_for_ready(ResId, maps:get(start_timeout, Opts, 5000)), ok; {error, _Reason} = Error -> Error From 0e299642cbf3aa2c803e3b860f2ee1e72d86dbe2 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 14:16:23 +0100 Subject: [PATCH 112/131] chore: function renamed but forgot to update xref excemption --- apps/emqx/test/emqx_bpapi_static_checks.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/test/emqx_bpapi_static_checks.erl b/apps/emqx/test/emqx_bpapi_static_checks.erl index f218739fc..142750cac 100644 --- a/apps/emqx/test/emqx_bpapi_static_checks.erl +++ b/apps/emqx/test/emqx_bpapi_static_checks.erl @@ -65,7 +65,7 @@ % Reason: legacy code. A fun and a QC query are % passed in the args, it's futile to try to statically % check it - "emqx_mgmt_api:do_query/2, emqx_mgmt_api:collect_total_from_tail_nodes/3" + "emqx_mgmt_api:do_query/2, emqx_mgmt_api:collect_total_from_tail_nodes/2" ). -define(XREF, myxref). From 980ba2fd35affcb995adf39a5f0ec61ceee4dbce Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 14:19:37 +0100 Subject: [PATCH 113/131] docs: Update apps/emqx/i18n/emqx_schema_i18n.conf Co-authored-by: Ivan Dyachkov --- apps/emqx/i18n/emqx_schema_i18n.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/i18n/emqx_schema_i18n.conf b/apps/emqx/i18n/emqx_schema_i18n.conf index 95790fae7..88ff373be 100644 --- a/apps/emqx/i18n/emqx_schema_i18n.conf +++ b/apps/emqx/i18n/emqx_schema_i18n.conf @@ -46,7 +46,7 @@ emqx_schema { overload_protection_backoff_delay { desc { - en: "When at high load, some unimportant tasks could be delayed for execution, here set the duration." + en: "The maximum duration of delay for background task execution during high load conditions." zh: "高负载时,一些不重要的任务可能会延迟执行,在这里设置允许延迟的时间。" } label { From 0d7cd2f0c55b77577c1aa40225eb260f282203f6 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 14:20:07 +0100 Subject: [PATCH 114/131] docs: Update apps/emqx_conf/i18n/emqx_conf_schema.conf Co-authored-by: Ivan Dyachkov --- apps/emqx_conf/i18n/emqx_conf_schema.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_conf/i18n/emqx_conf_schema.conf b/apps/emqx_conf/i18n/emqx_conf_schema.conf index 522e8cf6e..56da34621 100644 --- a/apps/emqx_conf/i18n/emqx_conf_schema.conf +++ b/apps/emqx_conf/i18n/emqx_conf_schema.conf @@ -1255,7 +1255,7 @@ Supervisor 报告的类型。默认为 error 类型。 log_overload_kill_restart_after { desc { - en: """If the handler is terminated, it restarts automatically after a delay. The value `infinity` prevents restarts.""" + en: """The handler restarts automatically after a delay in the event of termination, unless the value `infinity` is set, which blocks any subsequent restarts.""" zh: """如果处理进程终止,它会在以指定的时间后后自动重新启动。 `infinity` 不自动重启。""" } label { From 85fb53dcf0099a1c03e47d1d827ad8486e46f249 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 14:58:50 +0100 Subject: [PATCH 115/131] refactor: use compile switch to diff functions between ce and ee --- apps/emqx_bridge/src/emqx_bridge_api.erl | 10 +++--- .../src/schema/emqx_bridge_schema.erl | 36 ++++++++++--------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge_api.erl b/apps/emqx_bridge/src/emqx_bridge_api.erl index 2c43ce5d7..2e94f0719 100644 --- a/apps/emqx_bridge/src/emqx_bridge_api.erl +++ b/apps/emqx_bridge/src/emqx_bridge_api.erl @@ -171,12 +171,12 @@ bridge_info_examples(Method, WithMetrics) -> ee_bridge_examples(Method) ). +-if(?EMQX_RELEASE_EDITION == ee). ee_bridge_examples(Method) -> - try - emqx_ee_bridge:examples(Method) - catch - _:_ -> #{} - end. + emqx_ee_bridge:examples(Method). +-else. +ee_bridge_examples(_Method) -> #{}. +-endif. info_example(Type, Method, WithMetrics) -> maps:merge( diff --git a/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl b/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl index 4aecfac5d..c490294eb 100644 --- a/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl +++ b/apps/emqx_bridge/src/schema/emqx_bridge_schema.erl @@ -56,8 +56,8 @@ api_schema(Method) -> EE = ee_api_schemas(Method), hoconsc:union(Broker ++ EE). +-if(?EMQX_RELEASE_EDITION == ee). ee_api_schemas(Method) -> - %% must ensure the app is loaded before checking if fn is defined. ensure_loaded(emqx_ee_bridge, emqx_ee_bridge), case erlang:function_exported(emqx_ee_bridge, api_schemas, 1) of true -> emqx_ee_bridge:api_schemas(Method); @@ -65,13 +65,31 @@ ee_api_schemas(Method) -> end. ee_fields_bridges() -> - %% must ensure the app is loaded before checking if fn is defined. ensure_loaded(emqx_ee_bridge, emqx_ee_bridge), case erlang:function_exported(emqx_ee_bridge, fields, 1) of true -> emqx_ee_bridge:fields(bridges); false -> [] end. +%% must ensure the app is loaded before checking if fn is defined. +ensure_loaded(App, Mod) -> + try + _ = application:load(App), + _ = Mod:module_info(), + ok + catch + _:_ -> + ok + end. + +-else. + +ee_api_schemas(_) -> []. + +ee_fields_bridges() -> []. + +-endif. + common_bridge_fields() -> [ {enable, @@ -194,17 +212,3 @@ status() -> node_name() -> {"node", mk(binary(), #{desc => ?DESC("desc_node_name"), example => "emqx@127.0.0.1"})}. - -%%================================================================================================= -%% Internal fns -%%================================================================================================= - -ensure_loaded(App, Mod) -> - try - _ = application:load(App), - _ = Mod:module_info(), - ok - catch - _:_ -> - ok - end. From 7df50032c192b7401f1e66ce80b2f79a9a1a95b7 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 15:00:36 +0100 Subject: [PATCH 116/131] ci: only run static check for emqx-enterprise profile --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6da9231ed..15bc0c79c 100644 --- a/Makefile +++ b/Makefile @@ -79,7 +79,8 @@ ct: $(REBAR) merge-config .PHONY: static_checks static_checks: - @$(REBAR) as check do dialyzer, xref, ct --suite apps/emqx/test/emqx_static_checks --readable $(CT_READABLE) + @$(REBAR) as check do dialyzer, xref + @if [ "$${PROFILE}" = 'emqx-enterprise' ]; then $(REBAR) ct --suite apps/emqx/test/emqx_static_checks --readable $(CT_READABLE); fi APPS=$(shell $(SCRIPTS)/find-apps.sh) From 36ff3d75960087f51ddd060a53e564bd4800f491 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 16:11:17 +0100 Subject: [PATCH 117/131] chore: add a comment to explain why bpapi is only checked for enterprise --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 15bc0c79c..07160b7b0 100644 --- a/Makefile +++ b/Makefile @@ -77,6 +77,7 @@ test-compile: $(REBAR) merge-config ct: $(REBAR) merge-config @ENABLE_COVER_COMPILE=1 $(REBAR) ct --name $(CT_NODE_NAME) -c -v --cover_export_name $(CT_COVER_EXPORT_PREFIX)-ct +## only check bpapi for enterprise profile because it's a super-set. .PHONY: static_checks static_checks: @$(REBAR) as check do dialyzer, xref From a71d983ff886b22a94d23057bea4571643ad9622 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 16:10:41 +0100 Subject: [PATCH 118/131] test: fix assertMatch patter to make rebar3 fmt work --- apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl index 8e8f45dbc..c4afa4db2 100644 --- a/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_mqtt_SUITE.erl @@ -720,7 +720,8 @@ t_mqtt_conn_bridge_egress_reconnect(_) -> %% verify the metrics of the bridge, the message should be queued ?assertMatch( - #{<<"status">> := Status} when Status == <<"connecting">>; Status == <<"disconnected">>, + #{<<"status">> := Status} when + Status == <<"connecting">> orelse Status == <<"disconnected">>, request_bridge(BridgeIDEgress) ), %% matched >= 3 because of possible retries. @@ -797,7 +798,8 @@ t_mqtt_conn_bridge_egress_async_reconnect(_) -> ok = emqx_listeners:stop_listener('tcp:default'), ct:sleep(1500), ?assertMatch( - #{<<"status">> := Status} when Status == <<"connecting">>; Status == <<"disconnected">>, + #{<<"status">> := Status} when + Status == <<"connecting">> orelse Status == <<"disconnected">>, request_bridge(BridgeIDEgress) ), From 3d6756cbe800e43d45bb0c8bf0b1e2d0152b522f Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 16:37:56 +0100 Subject: [PATCH 119/131] test: fix addrinuse errors in listeners API test SUITE --- .../test/emqx_mgmt_api_listeners_SUITE.erl | 67 ++++++++++++++----- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl index 9a8824e7d..0e212d52f 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_listeners_SUITE.erl @@ -20,6 +20,8 @@ -include_lib("eunit/include/eunit.hrl"). +-define(PORT, (20000 + ?LINE)). + all() -> emqx_common_test_helpers:all(?MODULE). @@ -32,13 +34,38 @@ end_per_suite(_) -> emqx_conf:remove([listeners, tcp, new1], #{override_to => local}), emqx_mgmt_api_test_util:end_suite([emqx_conf]). -t_max_connection_default(_Config) -> +init_per_testcase(Case, Config) -> + try + ?MODULE:Case({init, Config}) + catch + error:function_clause -> + Config + end. + +end_per_testcase(Case, Config) -> + try + ?MODULE:Case({'end', Config}) + catch + error:function_clause -> + ok + end. + +t_max_connection_default({init, Config}) -> emqx_mgmt_api_test_util:end_suite([emqx_conf]), Etc = filename:join(["etc", "emqx.conf.all"]), + TmpConfName = atom_to_list(?FUNCTION_NAME) ++ ".conf", + Inc = filename:join(["etc", TmpConfName]), ConfFile = emqx_common_test_helpers:app_path(emqx_conf, Etc), - Bin = <<"listeners.tcp.max_connection_test {bind = \"0.0.0.0:3883\"}">>, - ok = file:write_file(ConfFile, Bin, [append]), + IncFile = emqx_common_test_helpers:app_path(emqx_conf, Inc), + Port = integer_to_binary(?PORT), + Bin = <<"listeners.tcp.max_connection_test {bind = \"0.0.0.0:", Port/binary, "\"}">>, + ok = file:write_file(IncFile, Bin), + ok = file:write_file(ConfFile, ["include \"", TmpConfName, "\""], [append]), emqx_mgmt_api_test_util:init_suite([emqx_conf]), + [{tmp_config_file, IncFile} | Config]; +t_max_connection_default({'end', Config}) -> + ok = file:delete(proplists:get_value(tmp_config_file, Config)); +t_max_connection_default(Config) when is_list(Config) -> %% Check infinity is binary not atom. #{<<"listeners">> := Listeners} = emqx_mgmt_api_listeners:do_list_listeners(), Target = lists:filter( @@ -51,7 +78,7 @@ t_max_connection_default(_Config) -> emqx_conf:remove([listeners, tcp, max_connection_test], #{override_to => cluster}), ok. -t_list_listeners(_) -> +t_list_listeners(Config) when is_list(Config) -> Path = emqx_mgmt_api_test_util:api_path(["listeners"]), Res = request(get, Path, [], []), #{<<"listeners">> := Expect} = emqx_mgmt_api_listeners:do_list_listeners(), @@ -71,9 +98,10 @@ t_list_listeners(_) -> ?assertMatch({error, {"HTTP/1.1", 404, _}}, request(get, NewPath, [], [])), OriginListener2 = maps:remove(<<"id">>, OriginListener), + Port = integer_to_binary(?PORT), NewConf = OriginListener2#{ <<"name">> => <<"new">>, - <<"bind">> => <<"0.0.0.0:2883">>, + <<"bind">> => <<"0.0.0.0:", Port/binary>>, <<"max_connections">> := <<"infinity">> }, Create = request(post, Path, [], NewConf), @@ -89,7 +117,7 @@ t_list_listeners(_) -> ?assertMatch({error, {"HTTP/1.1", 404, _}}, request(get, NewPath, [], [])), ok. -t_tcp_crud_listeners_by_id(_) -> +t_tcp_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"tcp:default">>, NewListenerId = <<"tcp:new">>, MinListenerId = <<"tcp:min">>, @@ -97,7 +125,7 @@ t_tcp_crud_listeners_by_id(_) -> Type = <<"tcp">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_ssl_crud_listeners_by_id(_) -> +t_ssl_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"ssl:default">>, NewListenerId = <<"ssl:new">>, MinListenerId = <<"ssl:min">>, @@ -105,7 +133,7 @@ t_ssl_crud_listeners_by_id(_) -> Type = <<"ssl">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_ws_crud_listeners_by_id(_) -> +t_ws_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"ws:default">>, NewListenerId = <<"ws:new">>, MinListenerId = <<"ws:min">>, @@ -113,7 +141,7 @@ t_ws_crud_listeners_by_id(_) -> Type = <<"ws">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_wss_crud_listeners_by_id(_) -> +t_wss_crud_listeners_by_id(Config) when is_list(Config) -> ListenerId = <<"wss:default">>, NewListenerId = <<"wss:new">>, MinListenerId = <<"wss:min">>, @@ -121,7 +149,7 @@ t_wss_crud_listeners_by_id(_) -> Type = <<"wss">>, crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type). -t_api_listeners_list_not_ready(_Config) -> +t_api_listeners_list_not_ready(Config) when is_list(Config) -> net_kernel:start(['listeners@127.0.0.1', longnames]), ct:timetrap({seconds, 120}), snabbkaffe:fix_ct_logging(), @@ -151,16 +179,17 @@ t_api_listeners_list_not_ready(_Config) -> emqx_common_test_helpers:stop_slave(Node2) end. -t_clear_certs(_) -> +t_clear_certs(Config) when is_list(Config) -> ListenerId = <<"ssl:default">>, NewListenerId = <<"ssl:clear">>, OriginPath = emqx_mgmt_api_test_util:api_path(["listeners", ListenerId]), NewPath = emqx_mgmt_api_test_util:api_path(["listeners", NewListenerId]), ConfTempT = request(get, OriginPath, [], []), + Port = integer_to_binary(?PORT), ConfTemp = ConfTempT#{ <<"id">> => NewListenerId, - <<"bind">> => <<"0.0.0.0:2883">> + <<"bind">> => <<"0.0.0.0:", Port/binary>> }, %% create, make sure the cert files are created @@ -245,9 +274,11 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> %% create with full options ?assertEqual({error, not_found}, is_running(NewListenerId)), ?assertMatch({error, {"HTTP/1.1", 404, _}}, request(get, NewPath, [], [])), + Port1 = integer_to_binary(?PORT), + Port2 = integer_to_binary(?PORT), NewConf = OriginListener#{ <<"id">> => NewListenerId, - <<"bind">> => <<"0.0.0.0:2883">> + <<"bind">> => <<"0.0.0.0:", Port1/binary>> }, Create = request(post, NewPath, [], NewConf), ?assertEqual(lists:sort(maps:keys(OriginListener)), lists:sort(maps:keys(Create))), @@ -271,7 +302,7 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> } -> #{ <<"id">> => MinListenerId, - <<"bind">> => <<"0.0.0.0:3883">>, + <<"bind">> => <<"0.0.0.0:", Port2/binary>>, <<"type">> => Type, <<"ssl_options">> => #{ <<"cacertfile">> => CaCertFile, @@ -282,7 +313,7 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> _ -> #{ <<"id">> => MinListenerId, - <<"bind">> => <<"0.0.0.0:3883">>, + <<"bind">> => <<"0.0.0.0:", Port2/binary>>, <<"type">> => Type } end, @@ -296,7 +327,7 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> BadPath = emqx_mgmt_api_test_util:api_path(["listeners", BadId]), BadConf = OriginListener#{ <<"id">> => BadId, - <<"bind">> => <<"0.0.0.0:2883">> + <<"bind">> => <<"0.0.0.0:", Port1/binary>> }, ?assertMatch({error, {"HTTP/1.1", 400, _}}, request(post, BadPath, [], BadConf)), @@ -332,12 +363,12 @@ crud_listeners_by_id(ListenerId, NewListenerId, MinListenerId, BadId, Type) -> ?assertEqual([], delete(NewPath)), ok. -t_delete_nonexistent_listener(_) -> +t_delete_nonexistent_listener(Config) when is_list(Config) -> NonExist = emqx_mgmt_api_test_util:api_path(["listeners", "tcp:nonexistent"]), ?assertEqual([], delete(NonExist)), ok. -t_action_listeners(_) -> +t_action_listeners(Config) when is_list(Config) -> ID = "tcp:default", action_listener(ID, "stop", false), action_listener(ID, "start", true), From 611701406f68d50f31481c35ad2c6482b38e059c Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 17:24:14 +0100 Subject: [PATCH 120/131] chore: upgrade dashboard to v1.1.7 for ce and e1.0.3 for ee --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 6da9231ed..f9be4401a 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,8 @@ export EMQX_DEFAULT_BUILDER = ghcr.io/emqx/emqx-builder/5.0-26:1.13.4-24.3.4.2-1 export EMQX_DEFAULT_RUNNER = debian:11-slim export OTP_VSN ?= $(shell $(CURDIR)/scripts/get-otp-vsn.sh) export ELIXIR_VSN ?= $(shell $(CURDIR)/scripts/get-elixir-vsn.sh) -export EMQX_DASHBOARD_VERSION ?= v1.1.6 -export EMQX_EE_DASHBOARD_VERSION ?= e1.0.2 +export EMQX_DASHBOARD_VERSION ?= v1.1.7 +export EMQX_EE_DASHBOARD_VERSION ?= e1.0.3 export EMQX_REL_FORM ?= tgz export QUICER_DOWNLOAD_FROM_RELEASE = 1 ifeq ($(OS),Windows_NT) From 42462db8b8185b65f7ab95b1bbcc03112d32114a Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 17:26:00 +0100 Subject: [PATCH 121/131] chore: bump version to v5.0.16 and e5.0.0 --- apps/emqx/include/emqx_release.hrl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index 72244acaa..4e4066eef 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,10 +32,10 @@ %% `apps/emqx/src/bpapi/README.md' %% Community edition --define(EMQX_RELEASE_CE, "5.0.15"). +-define(EMQX_RELEASE_CE, "5.0.16"). %% Enterprise edition --define(EMQX_RELEASE_EE, "5.0.0-rc.4"). +-define(EMQX_RELEASE_EE, "5.0.0"). %% the HTTP API version -define(EMQX_API_VERSION, "5.0"). From c5085ab235182c21bb5dc0f61fa7ee6d99318049 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 17:42:40 +0100 Subject: [PATCH 122/131] chore: bump chart version to v5.0.16 --- deploy/charts/emqx/Chart.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index fb689839c..2208f3019 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.0.15 +version: 5.0.16 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.0.15 +appVersion: 5.0.16 From 0c1df8109c1284b10f3a4d60c0e23fb222c0f1cf Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 17:43:01 +0100 Subject: [PATCH 123/131] chore: Generate changelog for v5.0.16 --- changes/v5.0.16-en.md | 18 ++++++++++++++++++ changes/v5.0.16-zh.md | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 changes/v5.0.16-en.md create mode 100644 changes/v5.0.16-zh.md diff --git a/changes/v5.0.16-en.md b/changes/v5.0.16-en.md new file mode 100644 index 000000000..8995d6d10 --- /dev/null +++ b/changes/v5.0.16-en.md @@ -0,0 +1,18 @@ +# v5.0.16 + +## Enhancements + + + +## Bug fixes + +- [#9824](https://github.com/emqx/emqx/pull/9824) The `topics/{topic}` API endpoint would return `500 - Internal Error` if a topic had multiple routes. This is fixed by returning a list of routes. + +- [#9832](https://github.com/emqx/emqx/pull/9832) Improve error log when bridge in 'sync' mode timed out to get response. + +- [#9834](https://github.com/emqx/emqx/pull/9834) Allow `mqtt.idle_timeout` to be set to `infinity` + +- [#9839](https://github.com/emqx/emqx/pull/9839) Make sure that the content of an Authorization header that users have specified for a webhook bridge is not printed to log files. + +- [#9884](https://github.com/emqx/emqx/pull/9884) Do not resume all buffer workers on successful health check of any individual resource. + Previously after any successful healthcheck, all buffer workers (for all resources) were resumed diff --git a/changes/v5.0.16-zh.md b/changes/v5.0.16-zh.md new file mode 100644 index 000000000..e8912efbb --- /dev/null +++ b/changes/v5.0.16-zh.md @@ -0,0 +1,18 @@ +# v5.0.16 + +## 增强 + + + +## 修复 + +- [#9824](https://github.com/emqx/emqx/pull/9824) 修复:当存在多个路由信息时,topics/{topic} 将会返回 500 - Internal Error 的问题,现在将会正确的返回路由信息列表。 + +- [#9832](https://github.com/emqx/emqx/pull/9832) 优化桥接同步资源调用超时情况下的一个错误日志。 + +- [#9834](https://github.com/emqx/emqx/pull/9834) 允许配置项 `mqtt.idle_timeout` 设置成 `infinity` + +- [#9839](https://github.com/emqx/emqx/pull/9839) 确保用户为webhook-bridge指定的Authorization-HTTP-header的内容不会被打印到日志文件。 + +- [#9884](https://github.com/emqx/emqx/pull/9884) 不在任意一个资源健康检查成功时恢复所有资源发送缓存。 + 在此修复之前,在任意一个资源成功进行健康检查后,所有资源的缓存都会尝试恢复。 From f3d433dee5209b2abda9d6a13ef1f65335feffaf Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 20:29:04 +0100 Subject: [PATCH 124/131] docs: move changes missed v5.0.16 release to v5.0.17 --- changes/{v5.0.16 => v5.0.17}/feat-9802.en.md | 0 changes/{v5.0.16 => v5.0.17}/feat-9802.zh.md | 0 changes/{v5.0.16 => v5.0.17}/feat-9871.en.md | 0 changes/{v5.0.16 => v5.0.17}/feat-9871.zh.md | 0 changes/{v5.0.16 => v5.0.17}/fix-9864.en.md | 0 changes/{v5.0.16 => v5.0.17}/fix-9864.zh.md | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename changes/{v5.0.16 => v5.0.17}/feat-9802.en.md (100%) rename changes/{v5.0.16 => v5.0.17}/feat-9802.zh.md (100%) rename changes/{v5.0.16 => v5.0.17}/feat-9871.en.md (100%) rename changes/{v5.0.16 => v5.0.17}/feat-9871.zh.md (100%) rename changes/{v5.0.16 => v5.0.17}/fix-9864.en.md (100%) rename changes/{v5.0.16 => v5.0.17}/fix-9864.zh.md (100%) diff --git a/changes/v5.0.16/feat-9802.en.md b/changes/v5.0.17/feat-9802.en.md similarity index 100% rename from changes/v5.0.16/feat-9802.en.md rename to changes/v5.0.17/feat-9802.en.md diff --git a/changes/v5.0.16/feat-9802.zh.md b/changes/v5.0.17/feat-9802.zh.md similarity index 100% rename from changes/v5.0.16/feat-9802.zh.md rename to changes/v5.0.17/feat-9802.zh.md diff --git a/changes/v5.0.16/feat-9871.en.md b/changes/v5.0.17/feat-9871.en.md similarity index 100% rename from changes/v5.0.16/feat-9871.en.md rename to changes/v5.0.17/feat-9871.en.md diff --git a/changes/v5.0.16/feat-9871.zh.md b/changes/v5.0.17/feat-9871.zh.md similarity index 100% rename from changes/v5.0.16/feat-9871.zh.md rename to changes/v5.0.17/feat-9871.zh.md diff --git a/changes/v5.0.16/fix-9864.en.md b/changes/v5.0.17/fix-9864.en.md similarity index 100% rename from changes/v5.0.16/fix-9864.en.md rename to changes/v5.0.17/fix-9864.en.md diff --git a/changes/v5.0.16/fix-9864.zh.md b/changes/v5.0.17/fix-9864.zh.md similarity index 100% rename from changes/v5.0.16/fix-9864.zh.md rename to changes/v5.0.17/fix-9864.zh.md From c44a836d24d2662b862ff7a0309969d622e632a2 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 20:30:16 +0100 Subject: [PATCH 125/131] docs: delete merged changelog files --- changes/v5.0.15/feat-9569.en.md | 1 - changes/v5.0.15/feat-9569.zh.md | 1 - changes/v5.0.15/feat-9585.en.md | 1 - changes/v5.0.15/feat-9585.zh.md | 1 - changes/v5.0.15/feat-9586.en.md | 1 - changes/v5.0.15/feat-9586.zh.md | 1 - changes/v5.0.15/feat-9628.en.md | 1 - changes/v5.0.15/feat-9628.zh.md | 1 - changes/v5.0.15/feat-9722.en.md | 3 --- changes/v5.0.15/feat-9722.zh.md | 3 --- changes/v5.0.15/feat-9725.en.md | 11 ----------- changes/v5.0.15/feat-9725.zh.md | 8 -------- changes/v5.0.15/feat-9736.en.md | 5 ----- changes/v5.0.15/feat-9736.zh.md | 5 ----- changes/v5.0.15/feat-9774.en.md | 3 --- changes/v5.0.15/feat-9774.zh.md | 2 -- changes/v5.0.15/fix-9626.en.md | 2 -- changes/v5.0.15/fix-9626.zh.md | 3 --- changes/v5.0.15/fix-9680.en.md | 1 - changes/v5.0.15/fix-9680.zh.md | 1 - changes/v5.0.15/fix-9726.en.md | 1 - changes/v5.0.15/fix-9726.zh.md | 1 - changes/v5.0.15/fix-9735.en.md | 1 - changes/v5.0.15/fix-9735.zh.md | 1 - changes/v5.0.15/fix-9748.en.md | 1 - changes/v5.0.15/fix-9748.zh.md | 1 - changes/v5.0.15/fix-9749.en.md | 1 - changes/v5.0.15/fix-9749.zh.md | 1 - changes/v5.0.15/fix-9750.en.md | 5 ----- changes/v5.0.15/fix-9750.zh.md | 4 ---- changes/v5.0.15/fix-9751.en.md | 1 - changes/v5.0.15/fix-9751.zh.md | 1 - changes/v5.0.15/fix-9763.en.md | 1 - changes/v5.0.15/fix-9763.zh.md | 1 - changes/v5.0.15/fix-9765.en.md | 6 ------ changes/v5.0.15/fix-9765.zh.md | 7 ------- changes/v5.0.15/fix-9769.en.md | 1 - changes/v5.0.15/fix-9769.zh.md | 1 - changes/v5.0.15/fix-9780.en.md | 1 - changes/v5.0.15/fix-9780.zh.md | 1 - changes/v5.0.15/fix-9781.en.md | 1 - changes/v5.0.15/fix-9781.zh.md | 1 - changes/v5.0.15/fix-9785.en.md | 1 - changes/v5.0.15/fix-9785.zh.md | 1 - changes/v5.0.15/fix-9787.en.md | 1 - changes/v5.0.15/fix-9787.zh.md | 1 - changes/v5.0.16/fix-9824.en.md | 1 - changes/v5.0.16/fix-9824.zh.md | 1 - changes/v5.0.16/fix-9832.en.md | 1 - changes/v5.0.16/fix-9832.zh.md | 1 - changes/v5.0.16/fix-9834.en.md | 1 - changes/v5.0.16/fix-9834.zh.md | 1 - changes/v5.0.16/fix-9839.en.md | 1 - changes/v5.0.16/fix-9839.zh.md | 1 - changes/v5.0.16/fix-9884.en.md | 2 -- changes/v5.0.16/fix-9884.zh.md | 2 -- 56 files changed, 111 deletions(-) delete mode 100644 changes/v5.0.15/feat-9569.en.md delete mode 100644 changes/v5.0.15/feat-9569.zh.md delete mode 100644 changes/v5.0.15/feat-9585.en.md delete mode 100644 changes/v5.0.15/feat-9585.zh.md delete mode 100644 changes/v5.0.15/feat-9586.en.md delete mode 100644 changes/v5.0.15/feat-9586.zh.md delete mode 100644 changes/v5.0.15/feat-9628.en.md delete mode 100644 changes/v5.0.15/feat-9628.zh.md delete mode 100644 changes/v5.0.15/feat-9722.en.md delete mode 100644 changes/v5.0.15/feat-9722.zh.md delete mode 100644 changes/v5.0.15/feat-9725.en.md delete mode 100644 changes/v5.0.15/feat-9725.zh.md delete mode 100644 changes/v5.0.15/feat-9736.en.md delete mode 100644 changes/v5.0.15/feat-9736.zh.md delete mode 100644 changes/v5.0.15/feat-9774.en.md delete mode 100644 changes/v5.0.15/feat-9774.zh.md delete mode 100644 changes/v5.0.15/fix-9626.en.md delete mode 100644 changes/v5.0.15/fix-9626.zh.md delete mode 100644 changes/v5.0.15/fix-9680.en.md delete mode 100644 changes/v5.0.15/fix-9680.zh.md delete mode 100644 changes/v5.0.15/fix-9726.en.md delete mode 100644 changes/v5.0.15/fix-9726.zh.md delete mode 100644 changes/v5.0.15/fix-9735.en.md delete mode 100644 changes/v5.0.15/fix-9735.zh.md delete mode 100644 changes/v5.0.15/fix-9748.en.md delete mode 100644 changes/v5.0.15/fix-9748.zh.md delete mode 100644 changes/v5.0.15/fix-9749.en.md delete mode 100644 changes/v5.0.15/fix-9749.zh.md delete mode 100644 changes/v5.0.15/fix-9750.en.md delete mode 100644 changes/v5.0.15/fix-9750.zh.md delete mode 100644 changes/v5.0.15/fix-9751.en.md delete mode 100644 changes/v5.0.15/fix-9751.zh.md delete mode 100644 changes/v5.0.15/fix-9763.en.md delete mode 100644 changes/v5.0.15/fix-9763.zh.md delete mode 100644 changes/v5.0.15/fix-9765.en.md delete mode 100644 changes/v5.0.15/fix-9765.zh.md delete mode 100644 changes/v5.0.15/fix-9769.en.md delete mode 100644 changes/v5.0.15/fix-9769.zh.md delete mode 100644 changes/v5.0.15/fix-9780.en.md delete mode 100644 changes/v5.0.15/fix-9780.zh.md delete mode 100644 changes/v5.0.15/fix-9781.en.md delete mode 100644 changes/v5.0.15/fix-9781.zh.md delete mode 100644 changes/v5.0.15/fix-9785.en.md delete mode 100644 changes/v5.0.15/fix-9785.zh.md delete mode 100644 changes/v5.0.15/fix-9787.en.md delete mode 100644 changes/v5.0.15/fix-9787.zh.md delete mode 100644 changes/v5.0.16/fix-9824.en.md delete mode 100644 changes/v5.0.16/fix-9824.zh.md delete mode 100644 changes/v5.0.16/fix-9832.en.md delete mode 100644 changes/v5.0.16/fix-9832.zh.md delete mode 100644 changes/v5.0.16/fix-9834.en.md delete mode 100644 changes/v5.0.16/fix-9834.zh.md delete mode 100644 changes/v5.0.16/fix-9839.en.md delete mode 100644 changes/v5.0.16/fix-9839.zh.md delete mode 100644 changes/v5.0.16/fix-9884.en.md delete mode 100644 changes/v5.0.16/fix-9884.zh.md diff --git a/changes/v5.0.15/feat-9569.en.md b/changes/v5.0.15/feat-9569.en.md deleted file mode 100644 index f3b70ec41..000000000 --- a/changes/v5.0.15/feat-9569.en.md +++ /dev/null @@ -1 +0,0 @@ -Refactor `/authorization/sources/built_in_database/` by adding `rules/` to the path. diff --git a/changes/v5.0.15/feat-9569.zh.md b/changes/v5.0.15/feat-9569.zh.md deleted file mode 100644 index dd2e19c11..000000000 --- a/changes/v5.0.15/feat-9569.zh.md +++ /dev/null @@ -1 +0,0 @@ -重构 `/authorization/sources/built_in_database/` 接口,将 `rules/` 添加到了其路径中。 diff --git a/changes/v5.0.15/feat-9585.en.md b/changes/v5.0.15/feat-9585.en.md deleted file mode 100644 index 986cbb0c3..000000000 --- a/changes/v5.0.15/feat-9585.en.md +++ /dev/null @@ -1 +0,0 @@ -`/bridges_probe` API endpoint to test params for creating a new data bridge. diff --git a/changes/v5.0.15/feat-9585.zh.md b/changes/v5.0.15/feat-9585.zh.md deleted file mode 100644 index 82dd307ae..000000000 --- a/changes/v5.0.15/feat-9585.zh.md +++ /dev/null @@ -1 +0,0 @@ -添加新 API 接口 `/bridges_probe` 用于测试创建桥接的参数是否可用。 diff --git a/changes/v5.0.15/feat-9586.en.md b/changes/v5.0.15/feat-9586.en.md deleted file mode 100644 index 777fb81df..000000000 --- a/changes/v5.0.15/feat-9586.en.md +++ /dev/null @@ -1 +0,0 @@ -Basic auth is no longer allowed for API calls, must use API key instead. diff --git a/changes/v5.0.15/feat-9586.zh.md b/changes/v5.0.15/feat-9586.zh.md deleted file mode 100644 index 102266a46..000000000 --- a/changes/v5.0.15/feat-9586.zh.md +++ /dev/null @@ -1 +0,0 @@ -API 调用不再支持基于 `username:password` 的 `baisc` 认证, 现在 API 必须通过 API Key 才能进行调用。 diff --git a/changes/v5.0.15/feat-9628.en.md b/changes/v5.0.15/feat-9628.en.md deleted file mode 100644 index 6f814dd21..000000000 --- a/changes/v5.0.15/feat-9628.en.md +++ /dev/null @@ -1 +0,0 @@ -Expose additional resource configuration parameters: `start_after_created` and `start_timeout`. diff --git a/changes/v5.0.15/feat-9628.zh.md b/changes/v5.0.15/feat-9628.zh.md deleted file mode 100644 index fee14181b..000000000 --- a/changes/v5.0.15/feat-9628.zh.md +++ /dev/null @@ -1 +0,0 @@ -为桥接资源增加了配置参数:`start_after_created` 和 `start_timeout`。 diff --git a/changes/v5.0.15/feat-9722.en.md b/changes/v5.0.15/feat-9722.en.md deleted file mode 100644 index b86f37b83..000000000 --- a/changes/v5.0.15/feat-9722.en.md +++ /dev/null @@ -1,3 +0,0 @@ -Add the following configuration options for Pushing metrics to Prometheus Push Gateway: -- `headers`: Allows custom HTTP request headers. -- `job_name`: allows to customize the name of the Job pushed to Push Gateway. diff --git a/changes/v5.0.15/feat-9722.zh.md b/changes/v5.0.15/feat-9722.zh.md deleted file mode 100644 index a806cb1de..000000000 --- a/changes/v5.0.15/feat-9722.zh.md +++ /dev/null @@ -1,3 +0,0 @@ -为 Prometheus 推送到 Push Gateway 新增以下配置项: -- `headers`:允许自定义 HTTP 请求头。 -- `job_name`:允许自定义推送到 Push Gateway 的 Job 名称。 diff --git a/changes/v5.0.15/feat-9725.en.md b/changes/v5.0.15/feat-9725.en.md deleted file mode 100644 index 832aa6bf9..000000000 --- a/changes/v5.0.15/feat-9725.en.md +++ /dev/null @@ -1,11 +0,0 @@ -Remove the config `auto_reconnect` from the emqx_authz, emqx_authn and data-bridge componets. -This is because we have another config with similar functions: `resource_opts.auto_restart_interval`。 - -The functions of these two config are difficult to distinguish, which will lead to confusion. -After this change, `auto_reconnect` will not be configurable (always be true), and the underlying -drivers that support this config will automatically reconnect the abnormally disconnected -connection every `2s`. - -And the config `resource_opts.auto_restart_interval` is still available for user. -It is the time interval that emqx restarts the resource when the connection cannot be -established for some reason. diff --git a/changes/v5.0.15/feat-9725.zh.md b/changes/v5.0.15/feat-9725.zh.md deleted file mode 100644 index e7a2412d4..000000000 --- a/changes/v5.0.15/feat-9725.zh.md +++ /dev/null @@ -1,8 +0,0 @@ -从认证、鉴权和数据桥接功能中,删除 `auto_reconnect` 配置项,因为我们还有另一个功能类似的配置项: -`resource_opts.auto_restart_interval`。 - -这两个配置项的功能难以区分,会导致困惑。此修改之后,`auto_reconnect` 将不可配置(永远为 true), -支持此配置的底层驱动将以 `2s` 为周期自动重连异常断开的连接。 - -而 `resource_opts.auto_restart_interval` 配置项仍然开放给用户配置,它是资源因为某些原因 -无法建立连接的时候,emqx 重新启动该资源的时间间隔。 diff --git a/changes/v5.0.15/feat-9736.en.md b/changes/v5.0.15/feat-9736.en.md deleted file mode 100644 index 59d7bd558..000000000 --- a/changes/v5.0.15/feat-9736.en.md +++ /dev/null @@ -1,5 +0,0 @@ -Refactor of /bridges API to make it more consistent with other APIs: -- bridge enable/disable is now done via the endpoint `/bridges/{id}/enable/[true,false]` -- `/bridges/{id}/operation/{operation}` endpoints are now `/bridges/{id}/{operation}` -- metrics are moved out from the GET `/bridges/{id}` response and can now be fetched via `/bridges/{id}/metrics` -- the `bridges/{id}/reset_metrics` endpoint is now `/bridges/{id}/metrics/reset` diff --git a/changes/v5.0.15/feat-9736.zh.md b/changes/v5.0.15/feat-9736.zh.md deleted file mode 100644 index 0107c8ab6..000000000 --- a/changes/v5.0.15/feat-9736.zh.md +++ /dev/null @@ -1,5 +0,0 @@ -重构部分 /bridges 的API 使得其和其他 API 能够更加一致: -- 桥接的启用和禁用现在是通过 `/bridges/{id}/enable/[true,false]` API 来实现的 -- 使用 `/bridges/{id}/{operation}` 替换了旧的 `/bridges/{id}/operation/{operation}` API -- 指标数据从 `/bridges/{id}` 的响应消息中移除,现在可以使用新的 API `/bridges/{id}/metrics` 进行访问 -- 使用 `/bridges/{id}/metrics/reset` 替换了旧的 `bridges/{id}/reset_metrics` API diff --git a/changes/v5.0.15/feat-9774.en.md b/changes/v5.0.15/feat-9774.en.md deleted file mode 100644 index 722c4db6b..000000000 --- a/changes/v5.0.15/feat-9774.en.md +++ /dev/null @@ -1,3 +0,0 @@ -Add a password complexity requirement when adding or modifying Dashboard users via the API. -Now password must contain at least 2 of alphabetic, numeric and special characters, -and must be 8 to 64 characters long. diff --git a/changes/v5.0.15/feat-9774.zh.md b/changes/v5.0.15/feat-9774.zh.md deleted file mode 100644 index 21bfddfaf..000000000 --- a/changes/v5.0.15/feat-9774.zh.md +++ /dev/null @@ -1,2 +0,0 @@ -通过 API 添加、修改 Dashboard 用户时,增加对密码复杂度的要求。 -现在密码必须包含字母、数字以及特殊字符中的至少 2 种,并且长度范围必须是 8~64 个字符。 diff --git a/changes/v5.0.15/fix-9626.en.md b/changes/v5.0.15/fix-9626.en.md deleted file mode 100644 index cc1c86d3e..000000000 --- a/changes/v5.0.15/fix-9626.en.md +++ /dev/null @@ -1,2 +0,0 @@ -Return authorization settings with default values. -The authorization cache is enabled by default, but due to the missing default value in `GET` response of `/authorization/settings`, it seemed to be disabled from the dashboard. diff --git a/changes/v5.0.15/fix-9626.zh.md b/changes/v5.0.15/fix-9626.zh.md deleted file mode 100644 index bc2391f48..000000000 --- a/changes/v5.0.15/fix-9626.zh.md +++ /dev/null @@ -1,3 +0,0 @@ -为授权设置 API 返回默认值。 -授权缓存默认为开启,但是在此修复前,因为默认值在 `/authorization/settings` 这个 API 的返回值中缺失, -使得在仪表盘配置页面中看起来是关闭了。 diff --git a/changes/v5.0.15/fix-9680.en.md b/changes/v5.0.15/fix-9680.en.md deleted file mode 100644 index 2ee3caaa5..000000000 --- a/changes/v5.0.15/fix-9680.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix the problem that username and password authentication is mandatory in Influxdb v1 write API. diff --git a/changes/v5.0.15/fix-9680.zh.md b/changes/v5.0.15/fix-9680.zh.md deleted file mode 100644 index bd1ace306..000000000 --- a/changes/v5.0.15/fix-9680.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复 InfluxDB v1 桥接写入 API 配置中强制需要用户名密码认证的问题。 diff --git a/changes/v5.0.15/fix-9726.en.md b/changes/v5.0.15/fix-9726.en.md deleted file mode 100644 index 9aa522690..000000000 --- a/changes/v5.0.15/fix-9726.en.md +++ /dev/null @@ -1 +0,0 @@ -Client fuzzy search API results were missing information which could tell if more results are available in the next pages, this is now fixed by providing `hasnext` flag in the response. diff --git a/changes/v5.0.15/fix-9726.zh.md b/changes/v5.0.15/fix-9726.zh.md deleted file mode 100644 index 3554d2db7..000000000 --- a/changes/v5.0.15/fix-9726.zh.md +++ /dev/null @@ -1 +0,0 @@ -在此修复前,客户端模糊搜索 API 缺少一些可以用于判断是否可以继续翻页的信息,现在通过在响应中提供 `hasnext` 标志来解决这个问题。 diff --git a/changes/v5.0.15/fix-9735.en.md b/changes/v5.0.15/fix-9735.en.md deleted file mode 100644 index 6085adecd..000000000 --- a/changes/v5.0.15/fix-9735.en.md +++ /dev/null @@ -1 +0,0 @@ -Password information has been removed from information log messages for http, ldap, mongo, mqtt, mysql, pgsql and redis. diff --git a/changes/v5.0.15/fix-9735.zh.md b/changes/v5.0.15/fix-9735.zh.md deleted file mode 100644 index d8aa81fd1..000000000 --- a/changes/v5.0.15/fix-9735.zh.md +++ /dev/null @@ -1 +0,0 @@ -密码信息已从http、ldap、mongo、mqtt、mysql、pgsql和redis的信息日志消息中删除。 diff --git a/changes/v5.0.15/fix-9748.en.md b/changes/v5.0.15/fix-9748.en.md deleted file mode 100644 index 85f5896b2..000000000 --- a/changes/v5.0.15/fix-9748.en.md +++ /dev/null @@ -1 +0,0 @@ -Listeners not configured with `max_connections` will cause the cluster `/listeners` API to return 500 error. diff --git a/changes/v5.0.15/fix-9748.zh.md b/changes/v5.0.15/fix-9748.zh.md deleted file mode 100644 index cab352e79..000000000 --- a/changes/v5.0.15/fix-9748.zh.md +++ /dev/null @@ -1 +0,0 @@ -监听器不配置 `max_connections` 时会导致集群 `/listeners` 接口返回 500 错误。 diff --git a/changes/v5.0.15/fix-9749.en.md b/changes/v5.0.15/fix-9749.en.md deleted file mode 100644 index f079385ce..000000000 --- a/changes/v5.0.15/fix-9749.en.md +++ /dev/null @@ -1 +0,0 @@ -In some cases search APIs could respond with an incorrect `count` value in the metadata, that is usually much bigger than expected, this is now fixed. diff --git a/changes/v5.0.15/fix-9749.zh.md b/changes/v5.0.15/fix-9749.zh.md deleted file mode 100644 index 356cf9475..000000000 --- a/changes/v5.0.15/fix-9749.zh.md +++ /dev/null @@ -1 +0,0 @@ -在某些情况下,搜索 API 可能会在元数据中响应不正确的 `count` 值,这通常比预期的要大得多,现在已经修复了。 diff --git a/changes/v5.0.15/fix-9750.en.md b/changes/v5.0.15/fix-9750.en.md deleted file mode 100644 index 98c07dfb6..000000000 --- a/changes/v5.0.15/fix-9750.en.md +++ /dev/null @@ -1,5 +0,0 @@ -Reload overriding configs after boot. -Prior to this change, two configs were allow to change from dashboard, but will not take effect after reboot: - * Logging (such as level) - * Prometheus configs - diff --git a/changes/v5.0.15/fix-9750.zh.md b/changes/v5.0.15/fix-9750.zh.md deleted file mode 100644 index 605d4c225..000000000 --- a/changes/v5.0.15/fix-9750.zh.md +++ /dev/null @@ -1,4 +0,0 @@ -启动后重新加载一些重载配置项。 -在此修复前,下面两个配置项允许从 Dashboard 控制台修改,但是在重启后无法生效: - * 日志 (例如日志级别) - * Prometheus 配置 diff --git a/changes/v5.0.15/fix-9751.en.md b/changes/v5.0.15/fix-9751.en.md deleted file mode 100644 index f45b99129..000000000 --- a/changes/v5.0.15/fix-9751.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix that obsoleted cert file will not be deleted after the listener is updated/deleted diff --git a/changes/v5.0.15/fix-9751.zh.md b/changes/v5.0.15/fix-9751.zh.md deleted file mode 100644 index 3908e5c20..000000000 --- a/changes/v5.0.15/fix-9751.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复在更新或者删除监听器后,过时的证书文件没有被删除的问题。 diff --git a/changes/v5.0.15/fix-9763.en.md b/changes/v5.0.15/fix-9763.en.md deleted file mode 100644 index 8c07a3d5d..000000000 --- a/changes/v5.0.15/fix-9763.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix an authentication exception when password is not provided diff --git a/changes/v5.0.15/fix-9763.zh.md b/changes/v5.0.15/fix-9763.zh.md deleted file mode 100644 index 8548a363e..000000000 --- a/changes/v5.0.15/fix-9763.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复客户端没有提供密码时的一个异常 diff --git a/changes/v5.0.15/fix-9765.en.md b/changes/v5.0.15/fix-9765.en.md deleted file mode 100644 index 7de7e55f3..000000000 --- a/changes/v5.0.15/fix-9765.en.md +++ /dev/null @@ -1,6 +0,0 @@ -Parse decimals as password from environment variable overrides correctly. -Prior to this change, config values for passwords are not allowed to be decimals. -e.g. `EMQX_FOOBAR__PASSWORD=12344` or `emqx.foobar.password=1234` -would result in a type check error, unless quoted as: -`EMQX_FOOBAR__PASSWORD='"12344"'` or `emqx.foobar.password="1234"`. -After this fix, the value does not have to be quoted. diff --git a/changes/v5.0.15/fix-9765.zh.md b/changes/v5.0.15/fix-9765.zh.md deleted file mode 100644 index dd0b6a79c..000000000 --- a/changes/v5.0.15/fix-9765.zh.md +++ /dev/null @@ -1,7 +0,0 @@ -允许使用纯数字作为密码配置。 -在此修复前,密码的配置必须是字符串,使用纯数字时,会报类型检查错误。 -例如,`EMQX_FOOBAR__PASSWORD=12344` 或 `emqx.foobar.password=1234` 会出错, -必须用引把值括起来才行: -`EMQX_FOOBAR__PASSWORD='"12344"'` 或 `emqx.foobar.password="1234"`。 -修复后可以不使用引号。在环境变量重载中使用更加方便。 - diff --git a/changes/v5.0.15/fix-9769.en.md b/changes/v5.0.15/fix-9769.en.md deleted file mode 100644 index e07397363..000000000 --- a/changes/v5.0.15/fix-9769.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix Erlang shell prompt version prefix. e5.0.15 -> v5.0.15 diff --git a/changes/v5.0.15/fix-9769.zh.md b/changes/v5.0.15/fix-9769.zh.md deleted file mode 100644 index c7e63b862..000000000 --- a/changes/v5.0.15/fix-9769.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复 Eralng 控制台版本号前缀的打印错误 e5.0.15 -> v5.0.15 diff --git a/changes/v5.0.15/fix-9780.en.md b/changes/v5.0.15/fix-9780.en.md deleted file mode 100644 index cf777e6dc..000000000 --- a/changes/v5.0.15/fix-9780.en.md +++ /dev/null @@ -1 +0,0 @@ -When creating disk queue directory for resource worker, substitute ':' with '-' in worker id. diff --git a/changes/v5.0.15/fix-9780.zh.md b/changes/v5.0.15/fix-9780.zh.md deleted file mode 100644 index bc5079e1d..000000000 --- a/changes/v5.0.15/fix-9780.zh.md +++ /dev/null @@ -1 +0,0 @@ -在为资源缓存进程创建磁盘队列目录时,在ID中用 '-' 代替 ':'。 diff --git a/changes/v5.0.15/fix-9781.en.md b/changes/v5.0.15/fix-9781.en.md deleted file mode 100644 index 2b34ddc24..000000000 --- a/changes/v5.0.15/fix-9781.en.md +++ /dev/null @@ -1 +0,0 @@ -Trace files were left on a node when creating a zip file for download. They are now removed when the file is sent. Also, concurrent downloads will no longer interfere with each other. diff --git a/changes/v5.0.15/fix-9781.zh.md b/changes/v5.0.15/fix-9781.zh.md deleted file mode 100644 index 5c4cee0f5..000000000 --- a/changes/v5.0.15/fix-9781.zh.md +++ /dev/null @@ -1 +0,0 @@ -当下载 日志追踪 的日志时,一些中间文件将存留在处理节点上,现在这个问题得到了修复。同时,并发下载日志将不再相互干扰。 diff --git a/changes/v5.0.15/fix-9785.en.md b/changes/v5.0.15/fix-9785.en.md deleted file mode 100644 index 8af14b1ed..000000000 --- a/changes/v5.0.15/fix-9785.en.md +++ /dev/null @@ -1 +0,0 @@ -Stop authentication hook chain if `emqx_authentication` provides a definitive result. diff --git a/changes/v5.0.15/fix-9785.zh.md b/changes/v5.0.15/fix-9785.zh.md deleted file mode 100644 index 6aa84f755..000000000 --- a/changes/v5.0.15/fix-9785.zh.md +++ /dev/null @@ -1 +0,0 @@ -如果 `emqx_authentication` 提供了确定的结果,则停止认证钩子链。 diff --git a/changes/v5.0.15/fix-9787.en.md b/changes/v5.0.15/fix-9787.en.md deleted file mode 100644 index b41180368..000000000 --- a/changes/v5.0.15/fix-9787.en.md +++ /dev/null @@ -1 +0,0 @@ -Fix a compatible problem for the `webhook` bridge configuration which was created before the v5.0.12. diff --git a/changes/v5.0.15/fix-9787.zh.md b/changes/v5.0.15/fix-9787.zh.md deleted file mode 100644 index a9d758de0..000000000 --- a/changes/v5.0.15/fix-9787.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复对在 v5.0.12 之前创建的 `webhook` 桥接配置的兼容问题。 diff --git a/changes/v5.0.16/fix-9824.en.md b/changes/v5.0.16/fix-9824.en.md deleted file mode 100644 index 29aa93264..000000000 --- a/changes/v5.0.16/fix-9824.en.md +++ /dev/null @@ -1 +0,0 @@ -The `topics/{topic}` API endpoint would return `500 - Internal Error` if a topic had multiple routes. This is fixed by returning a list of routes. diff --git a/changes/v5.0.16/fix-9824.zh.md b/changes/v5.0.16/fix-9824.zh.md deleted file mode 100644 index 143a39c16..000000000 --- a/changes/v5.0.16/fix-9824.zh.md +++ /dev/null @@ -1 +0,0 @@ -修复:当存在多个路由信息时,topics/{topic} 将会返回 500 - Internal Error 的问题,现在将会正确的返回路由信息列表。 diff --git a/changes/v5.0.16/fix-9832.en.md b/changes/v5.0.16/fix-9832.en.md deleted file mode 100644 index 84178b63c..000000000 --- a/changes/v5.0.16/fix-9832.en.md +++ /dev/null @@ -1 +0,0 @@ -Improve error log when bridge in 'sync' mode timed out to get response. diff --git a/changes/v5.0.16/fix-9832.zh.md b/changes/v5.0.16/fix-9832.zh.md deleted file mode 100644 index e7fd33b6b..000000000 --- a/changes/v5.0.16/fix-9832.zh.md +++ /dev/null @@ -1 +0,0 @@ -优化桥接同步资源调用超时情况下的一个错误日志。 diff --git a/changes/v5.0.16/fix-9834.en.md b/changes/v5.0.16/fix-9834.en.md deleted file mode 100644 index d5ad1f67a..000000000 --- a/changes/v5.0.16/fix-9834.en.md +++ /dev/null @@ -1 +0,0 @@ -Allow `mqtt.idle_timeout` to be set to `infinity` diff --git a/changes/v5.0.16/fix-9834.zh.md b/changes/v5.0.16/fix-9834.zh.md deleted file mode 100644 index 06eafc1a0..000000000 --- a/changes/v5.0.16/fix-9834.zh.md +++ /dev/null @@ -1 +0,0 @@ -允许配置项 `mqtt.idle_timeout` 设置成 `infinity` diff --git a/changes/v5.0.16/fix-9839.en.md b/changes/v5.0.16/fix-9839.en.md deleted file mode 100644 index 9962b6338..000000000 --- a/changes/v5.0.16/fix-9839.en.md +++ /dev/null @@ -1 +0,0 @@ -Make sure that the content of an Authorization header that users have specified for a webhook bridge is not printed to log files. diff --git a/changes/v5.0.16/fix-9839.zh.md b/changes/v5.0.16/fix-9839.zh.md deleted file mode 100644 index d9e1e0ad8..000000000 --- a/changes/v5.0.16/fix-9839.zh.md +++ /dev/null @@ -1 +0,0 @@ -确保用户为webhook-bridge指定的Authorization-HTTP-header的内容不会被打印到日志文件。 diff --git a/changes/v5.0.16/fix-9884.en.md b/changes/v5.0.16/fix-9884.en.md deleted file mode 100644 index 28eacfc86..000000000 --- a/changes/v5.0.16/fix-9884.en.md +++ /dev/null @@ -1,2 +0,0 @@ -Do not resume all buffer workers on successful health check of any individual resource. -Previously after any successful healthcheck, all buffer workers (for all resources) were resumed diff --git a/changes/v5.0.16/fix-9884.zh.md b/changes/v5.0.16/fix-9884.zh.md deleted file mode 100644 index 08f6e7188..000000000 --- a/changes/v5.0.16/fix-9884.zh.md +++ /dev/null @@ -1,2 +0,0 @@ -不在任意一个资源健康检查成功时恢复所有资源发送缓存。 -在此修复之前,在任意一个资源成功进行健康检查后,所有资源的缓存都会尝试恢复。 From 1e0fe270bca1b5727266889256075606481d50da Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 20:32:09 +0100 Subject: [PATCH 126/131] chore: bump emqx app vsn to 5.0.17 --- apps/emqx/src/emqx.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx.app.src b/apps/emqx/src/emqx.app.src index 270d36a5e..c812b2217 100644 --- a/apps/emqx/src/emqx.app.src +++ b/apps/emqx/src/emqx.app.src @@ -3,7 +3,7 @@ {id, "emqx"}, {description, "EMQX Core"}, % strict semver, bump manually! - {vsn, "5.0.16"}, + {vsn, "5.0.17"}, {modules, []}, {registered, []}, {applications, [ From 90f23ffc7880369c536c58ee3e18cf65fefacb87 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 2 Feb 2023 21:43:15 +0100 Subject: [PATCH 127/131] ci: no relup support for now --- scripts/relup-build/download-base-packages.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/relup-build/download-base-packages.sh b/scripts/relup-build/download-base-packages.sh index 1a03f7ef8..fc4511b58 100755 --- a/scripts/relup-build/download-base-packages.sh +++ b/scripts/relup-build/download-base-packages.sh @@ -14,8 +14,10 @@ export PROFILE case $PROFILE in "emqx-enterprise") - DIR='emqx-ee' - EDITION='enterprise' + #S3DIR='emqx-ee' + #EDITION='enterprise' + echo "No relup for now" + exit 0 ;; "emqx") echo "No relup for opensource edition" @@ -51,7 +53,7 @@ mkdir -p _upgrade_base pushd _upgrade_base >/dev/null for tag in ${BASE_VERSIONS}; do filename="$PROFILE-$(fullvsn "${tag#[e|v]}").tar.gz" - url="https://packages.emqx.io/$DIR/$tag/$filename" + url="https://packages.emqx.io/$S3DIR/$tag/$filename" echo "downloading ${filename} ..." ## if the file does not exist (not downloaded yet) ## and there is such a package to downlaod From 636247650854c41645d238f0964f70eccaa64328 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 3 Feb 2023 09:00:34 +0100 Subject: [PATCH 128/131] feat: disable telemetry for ee by default --- lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf | 1 + scripts/merge-config.escript | 33 ++++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) create mode 100644 lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf diff --git a/lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf b/lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf new file mode 100644 index 000000000..8da63dad9 --- /dev/null +++ b/lib-ee/emqx_ee_conf/etc/emqx-enterprise.conf @@ -0,0 +1 @@ +telemetry.enable = false diff --git a/scripts/merge-config.escript b/scripts/merge-config.escript index 1b30dbd1d..d30a0ca68 100755 --- a/scripts/merge-config.escript +++ b/scripts/merge-config.escript @@ -30,7 +30,7 @@ main(_) -> case IsEnterprise of true -> EnterpriseCfgs = get_all_cfgs("lib-ee"), - EnterpriseConf = merge("", EnterpriseCfgs), + EnterpriseConf = merge(<<"">>, EnterpriseCfgs), ok = file:write_file("apps/emqx_conf/etc/emqx-enterprise.conf.all", EnterpriseConf); false -> ok @@ -41,22 +41,21 @@ is_enterprise() -> nomatch =/= string:find(Profile, "enterprise"). merge(BaseConf, Cfgs) -> - lists:foldl( - fun(CfgFile, Acc) -> - case filelib:is_regular(CfgFile) of - true -> - {ok, Bin1} = file:read_file(CfgFile), - case string:trim(Bin1, both) of - <<>> -> Acc; - Bin2 -> [Acc, io_lib:nl(), io_lib:nl(), Bin2] - end; - false -> - Acc - end - end, - BaseConf, - Cfgs - ). + Confs = [BaseConf | lists:map(fun read_conf/1, Cfgs)], + infix(lists:filter(fun(I) -> iolist_size(I) > 0 end, Confs), [io_lib:nl(), io_lib:nl()]). + +read_conf(CfgFile) -> + case filelib:is_regular(CfgFile) of + true -> + {ok, Bin1} = file:read_file(CfgFile), + string:trim(Bin1, both); + false -> + <<>> + end. + +infix([], _With) -> []; +infix([One], _With) -> [One]; +infix([H | T], With) -> [H, With, infix(T, With)]. get_all_cfgs(Root) -> Apps0 = filelib:wildcard("*", Root) -- ["emqx_machine", "emqx_conf"], From c95d201331ca59a4ff361a3b53dfb194da821616 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 3 Feb 2023 09:40:28 +0100 Subject: [PATCH 129/131] ci: no relup for now --- scripts/relup-build/download-base-packages.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/relup-build/download-base-packages.sh b/scripts/relup-build/download-base-packages.sh index 1a03f7ef8..8d89e8ab5 100755 --- a/scripts/relup-build/download-base-packages.sh +++ b/scripts/relup-build/download-base-packages.sh @@ -14,8 +14,10 @@ export PROFILE case $PROFILE in "emqx-enterprise") - DIR='emqx-ee' - EDITION='enterprise' + #DIR='emqx-ee' + #EDITION='enterprise' + echo "No relup for now" + exit 0 ;; "emqx") echo "No relup for opensource edition" From d45a0cb2ee929f48113823bfc0b832f10e094408 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Fri, 3 Feb 2023 15:45:18 +0800 Subject: [PATCH 130/131] chore: remove <<>> when logging username --- apps/emqx_dashboard/src/emqx_dashboard_api.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_api.erl index a4322c696..cc2a1337d 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_api.erl @@ -325,7 +325,7 @@ is_self_auth_token(Username, Token) -> end. change_pwd(post, #{bindings := #{username := Username}, body := Params}) -> - LogMeta = #{msg => "Dashboard change password", username => Username}, + LogMeta = #{msg => "Dashboard change password", username => binary_to_list(Username)}, OldPwd = maps:get(<<"old_pwd">>, Params), NewPwd = maps:get(<<"new_pwd">>, Params), case ?EMPTY(OldPwd) orelse ?EMPTY(NewPwd) of From 7233f42889d3786575742e8a404b54edded04632 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 3 Feb 2023 09:58:02 +0100 Subject: [PATCH 131/131] ci: skip relup tests for now --- .github/workflows/run_relup_tests.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/run_relup_tests.yaml b/.github/workflows/run_relup_tests.yaml index a6854aa40..29b4fbc45 100644 --- a/.github/workflows/run_relup_tests.yaml +++ b/.github/workflows/run_relup_tests.yaml @@ -4,13 +4,13 @@ concurrency: group: relup-${{ github.event_name }}-${{ github.ref }} cancel-in-progress: true -on: - push: - branches: - - '**' - tags: - - e* - pull_request: +# on: +# push: +# branches: +# - '**' +# tags: +# - e* +# pull_request: jobs: relup_test_plan: