From 4068258c785104c94f119d32c3c70deb0693f9d3 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Fri, 19 Jan 2024 10:20:57 -0300 Subject: [PATCH 01/89] fix(bridge_api): explicitly state bpapi version ranges Fixes https://emqx.atlassian.net/browse/EMQX-11767 The currently supported version for a node may not be available at the time it's probed. --- apps/emqx_bridge/src/emqx_bridge_api.erl | 13 ++++++------- apps/emqx_bridge/src/emqx_bridge_v2_api.erl | 7 +++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge_api.erl b/apps/emqx_bridge/src/emqx_bridge_api.erl index f53503b86..e0b921a1c 100644 --- a/apps/emqx_bridge/src/emqx_bridge_api.erl +++ b/apps/emqx_bridge/src/emqx_bridge_api.erl @@ -1132,15 +1132,14 @@ maybe_unwrap({error, not_implemented}) -> maybe_unwrap(RpcMulticallResult) -> emqx_rpc:unwrap_erpc(RpcMulticallResult). -supported_versions(start_bridge_to_node) -> bpapi_version_range(2, latest); -supported_versions(start_bridges_to_all_nodes) -> bpapi_version_range(2, latest); -supported_versions(get_metrics_from_all_nodes) -> bpapi_version_range(4, latest); -supported_versions(_Call) -> bpapi_version_range(1, latest). +supported_versions(start_bridge_to_node) -> bpapi_version_range(2, 6); +supported_versions(start_bridges_to_all_nodes) -> bpapi_version_range(2, 6); +supported_versions(get_metrics_from_all_nodes) -> bpapi_version_range(4, 6); +supported_versions(_Call) -> bpapi_version_range(1, 6). %% [From, To] (inclusive on both ends) -bpapi_version_range(From, latest) -> - ThisNodeVsn = emqx_bpapi:supported_version(node(), ?BPAPI_NAME), - lists:seq(From, ThisNodeVsn). +bpapi_version_range(From, To) -> + lists:seq(From, To). redact(Term) -> emqx_utils:redact(Term). diff --git a/apps/emqx_bridge/src/emqx_bridge_v2_api.erl b/apps/emqx_bridge/src/emqx_bridge_v2_api.erl index e8a500e85..95471ae5b 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2_api.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2_api.erl @@ -1052,12 +1052,11 @@ do_bpapi_call_vsn(Version, Call, Args) -> is_supported_version(Version, Call) -> lists:member(Version, supported_versions(Call)). -supported_versions(_Call) -> bpapi_version_range(6, latest). +supported_versions(_Call) -> bpapi_version_range(6, 6). %% [From, To] (inclusive on both ends) -bpapi_version_range(From, latest) -> - ThisNodeVsn = emqx_bpapi:supported_version(node(), ?BPAPI_NAME), - lists:seq(From, ThisNodeVsn). +bpapi_version_range(From, To) -> + lists:seq(From, To). maybe_unwrap({error, not_implemented}) -> {error, not_implemented}; From e369c1b971b31fdf1ef16f8dc47eac811a11bfb8 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Fri, 19 Jan 2024 10:13:59 -0300 Subject: [PATCH 02/89] docs(mqtt_source): fix example qos --- apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl index 05b2d6d3a..fdb14315c 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl @@ -187,7 +187,7 @@ source_examples(Method) -> #{ parameters => #{ topic => <<"remote/topic">>, - qos => 2 + qos => 1 } } ) From d22092e3da3af8b6120436af421ec1e2385013db Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Fri, 19 Jan 2024 10:07:56 -0300 Subject: [PATCH 03/89] fix(mqtt_source): don't attempt local publish when legacy config is absent Fixes https://emqx.atlassian.net/browse/EMQX-11765 --- apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl | 6 ++++-- .../test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl index 369238ecf..08127ebcd 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_ingress.erl @@ -18,6 +18,7 @@ -include_lib("emqx/include/logger.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). %% management APIs -export([ @@ -284,9 +285,10 @@ maybe_on_message_received(Msg, {Mod, Func, Args}) -> maybe_on_message_received(_Msg, undefined) -> ok. -maybe_publish_local(Msg, Local = #{}, Props) -> +maybe_publish_local(Msg, Local = #{topic := Topic}, Props) when Topic =/= undefined -> + ?tp(mqtt_ingress_publish_local, #{msg => Msg, local => Local}), emqx_broker:publish(to_broker_msg(Msg, Local, Props)); -maybe_publish_local(_Msg, undefined, _Props) -> +maybe_publish_local(_Msg, _Local, _Props) -> ok. %% diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl index 3e5471d55..62e0e4f51 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_v2_subscriber_SUITE.erl @@ -238,6 +238,9 @@ t_receive_via_rule(Config) -> end, fun(Trace) -> ?assertEqual([], ?of_kind("action_references_nonexistent_bridges", Trace)), + %% We don't have the hidden, legacy `local' config set, so we shouldn't + %% attempt to publish directly. + ?assertEqual([], ?of_kind(mqtt_ingress_publish_local, Trace)), ok end ), From 54457b7093c9ae604fdb259a75f1c34b952763a6 Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Thu, 18 Jan 2024 19:03:54 +0200 Subject: [PATCH 04/89] feat(emqx_utils): allow `infinity` timeout in `pmap/3` --- apps/emqx_utils/src/emqx_utils.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/emqx_utils/src/emqx_utils.erl b/apps/emqx_utils/src/emqx_utils.erl index 8d7c622a4..046551e8d 100644 --- a/apps/emqx_utils/src/emqx_utils.erl +++ b/apps/emqx_utils/src/emqx_utils.erl @@ -431,7 +431,9 @@ pmap(Fun, List) when is_function(Fun, 1), is_list(List) -> -spec pmap(fun((A) -> B), list(A), timeout()) -> list(B). pmap(Fun, List, Timeout) when - is_function(Fun, 1), is_list(List), is_integer(Timeout), Timeout >= 0 + is_function(Fun, 1), + is_list(List), + (is_integer(Timeout) andalso Timeout >= 0 orelse Timeout =:= infinity) -> nolink_apply(fun() -> do_parallel_map(Fun, List) end, Timeout). From a6568dec758653c330ec1648c7055b43cee0618b Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Thu, 18 Jan 2024 19:04:50 +0200 Subject: [PATCH 05/89] perf(emqx_bridge/connector): apply post config bridge/connector changes in parallel This can greatly improve the performance when many bridges/connectors are being changed, e.g. when a backup file is being imported. Fixes: EMQX-11751 --- apps/emqx_bridge/src/emqx_bridge.erl | 70 +++++++++++-------- apps/emqx_bridge/src/emqx_bridge_v2.erl | 62 +++++++++------- .../emqx_bridge/test/emqx_bridge_v2_SUITE.erl | 44 ++++++++---- apps/emqx_connector/src/emqx_connector.erl | 70 +++++++++++-------- changes/ce/perf-12354.en.md | 3 + 5 files changed, 155 insertions(+), 94 deletions(-) create mode 100644 changes/ce/perf-12354.en.md diff --git a/apps/emqx_bridge/src/emqx_bridge.erl b/apps/emqx_bridge/src/emqx_bridge.erl index e27748610..3cf9a199a 100644 --- a/apps/emqx_bridge/src/emqx_bridge.erl +++ b/apps/emqx_bridge/src/emqx_bridge.erl @@ -284,15 +284,15 @@ pre_config_update([?ROOT_KEY], NewConf, _RawConf) -> post_config_update([?ROOT_KEY], _Req, NewConf, OldConf, _AppEnv) -> #{added := Added, removed := Removed, changed := Updated} = diff_confs(NewConf, OldConf), - %% The config update will be failed if any task in `perform_bridge_changes` failed. Result = perform_bridge_changes([ - #{action => fun emqx_bridge_resource:remove/4, data => Removed}, + #{action => fun emqx_bridge_resource:remove/4, action_name => remove, data => Removed}, #{ action => fun emqx_bridge_resource:create/4, + action_name => create, data => Added, on_exception_fn => fun emqx_bridge_resource:remove/4 }, - #{action => fun emqx_bridge_resource:update/4, data => Updated} + #{action => fun emqx_bridge_resource:update/4, action_name => update, data => Updated} ]), ok = unload_hook(), ok = load_hook(NewConf), @@ -534,28 +534,21 @@ convert_certs(BridgesConf) -> ). perform_bridge_changes(Tasks) -> - perform_bridge_changes(Tasks, ok). + perform_bridge_changes(Tasks, []). -perform_bridge_changes([], Result) -> - Result; -perform_bridge_changes([#{action := Action, data := MapConfs} = Task | Tasks], Result0) -> +perform_bridge_changes([], Errors) -> + case Errors of + [] -> ok; + _ -> {error, Errors} + end; +perform_bridge_changes([#{action := Action, data := MapConfs} = Task | Tasks], Errors0) -> OnException = maps:get(on_exception_fn, Task, fun(_Type, _Name, _Conf, _Opts) -> ok end), - Result = maps:fold( - fun - ({_Type, _Name}, _Conf, {error, Reason}) -> - {error, Reason}; - %% for emqx_bridge_resource:update/4 - ({Type, Name}, {OldConf, Conf}, _) -> - ResOpts = emqx_resource:fetch_creation_opts(Conf), - case Action(Type, Name, {OldConf, Conf}, ResOpts) of - {error, Reason} -> {error, Reason}; - Return -> Return - end; - ({Type, Name}, Conf, _) -> - ResOpts = emqx_resource:fetch_creation_opts(Conf), - try Action(Type, Name, Conf, ResOpts) of - {error, Reason} -> {error, Reason}; - Return -> Return + Results = emqx_utils:pmap( + fun({{Type, Name}, Conf}) -> + ResOpts = creation_opts(Conf), + Res = + try + Action(Type, Name, Conf, ResOpts) catch Kind:Error:Stacktrace -> ?SLOG(error, #{ @@ -567,13 +560,34 @@ perform_bridge_changes([#{action := Action, data := MapConfs} = Task | Tasks], R stacktrace => Stacktrace }), OnException(Type, Name, Conf, ResOpts), - erlang:raise(Kind, Error, Stacktrace) - end + {error, Error} + end, + {{Type, Name}, Res} end, - Result0, - MapConfs + maps:to_list(MapConfs), + infinity ), - perform_bridge_changes(Tasks, Result). + Errs = lists:filter( + fun + ({_TypeName, {error, _}}) -> true; + (_) -> false + end, + Results + ), + Errors = + case Errs of + [] -> + Errors0; + _ -> + #{action_name := ActionName} = Task, + [#{action => ActionName, errors => Errs} | Errors0] + end, + perform_bridge_changes(Tasks, Errors). + +creation_opts({_OldConf, Conf}) -> + emqx_resource:fetch_creation_opts(Conf); +creation_opts(Conf) -> + emqx_resource:fetch_creation_opts(Conf). diff_confs(NewConfs, OldConfs) -> emqx_utils_maps:diff_maps( diff --git a/apps/emqx_bridge/src/emqx_bridge_v2.erl b/apps/emqx_bridge/src/emqx_bridge_v2.erl index b69882080..522f0aca8 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2.erl @@ -1059,7 +1059,6 @@ post_config_update([ConfRootKey], _Req, NewConf, OldConf, _AppEnv) when -> #{added := Added, removed := Removed, changed := Updated} = diff_confs(NewConf, OldConf), - %% The config update will be failed if any task in `perform_bridge_changes` failed. RemoveFun = fun(Type, Name, Conf) -> uninstall_bridge_v2(ConfRootKey, Type, Name, Conf) end, @@ -1071,13 +1070,14 @@ post_config_update([ConfRootKey], _Req, NewConf, OldConf, _AppEnv) when install_bridge_v2(ConfRootKey, Type, Name, Conf) end, Result = perform_bridge_changes([ - #{action => RemoveFun, data => Removed}, + #{action => RemoveFun, action_name => remove, data => Removed}, #{ action => CreateFun, + action_name => create, data => Added, on_exception_fn => fun emqx_bridge_resource:remove/4 }, - #{action => UpdateFun, data => Updated} + #{action => UpdateFun, action_name => update, data => Updated} ]), reload_message_publish_hook(NewConf), ?tp(bridge_post_config_update_done, #{}), @@ -1141,26 +1141,20 @@ do_flatten_confs(Type, Conf0) -> [{{Type, Name}, Conf} || {Name, Conf} <- maps:to_list(Conf0)]. perform_bridge_changes(Tasks) -> - perform_bridge_changes(Tasks, ok). + perform_bridge_changes(Tasks, []). -perform_bridge_changes([], Result) -> - Result; -perform_bridge_changes([#{action := Action, data := MapConfs} = Task | Tasks], Result0) -> +perform_bridge_changes([], Errors) -> + case Errors of + [] -> ok; + _ -> {error, Errors} + end; +perform_bridge_changes([#{action := Action, data := MapConfs} = Task | Tasks], Errors0) -> OnException = maps:get(on_exception_fn, Task, fun(_Type, _Name, _Conf, _Opts) -> ok end), - Result = maps:fold( - fun - ({_Type, _Name}, _Conf, {error, Reason}) -> - {error, Reason}; - %% for update - ({Type, Name}, {OldConf, Conf}, _) -> - case Action(Type, Name, {OldConf, Conf}) of - {error, Reason} -> {error, Reason}; - Return -> Return - end; - ({Type, Name}, Conf, _) -> - try Action(Type, Name, Conf) of - {error, Reason} -> {error, Reason}; - Return -> Return + Results = emqx_utils:pmap( + fun({{Type, Name}, Conf}) -> + Res = + try + Action(Type, Name, Conf) catch Kind:Error:Stacktrace -> ?SLOG(error, #{ @@ -1172,13 +1166,29 @@ perform_bridge_changes([#{action := Action, data := MapConfs} = Task | Tasks], R stacktrace => Stacktrace }), OnException(Type, Name, Conf), - erlang:raise(Kind, Error, Stacktrace) - end + {error, Error} + end, + {{Type, Name}, Res} end, - Result0, - MapConfs + maps:to_list(MapConfs), + infinity ), - perform_bridge_changes(Tasks, Result). + Errs = lists:filter( + fun + ({_TypeName, {error, _}}) -> true; + (_) -> false + end, + Results + ), + Errors = + case Errs of + [] -> + Errors0; + _ -> + #{action_name := ActionName} = Task, + [#{action => ActionName, errors => Errs} | Errors0] + end, + perform_bridge_changes(Tasks, Errors). fill_defaults(Type, RawConf, TopLevelConf, SchemaModule) -> PackedConf = pack_bridge_conf(Type, RawConf, TopLevelConf), diff --git a/apps/emqx_bridge/test/emqx_bridge_v2_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_v2_SUITE.erl index f3b8a29d7..ba631f71a 100644 --- a/apps/emqx_bridge/test/emqx_bridge_v2_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_v2_SUITE.erl @@ -606,12 +606,22 @@ t_load_no_matching_connector(_Config) -> }, ?assertMatch( {error, - {post_config_update, _HandlerMod, #{ - bridge_name := my_test_bridge_update, - connector_name := <<"unknown">>, - bridge_type := _, - reason := <<"connector_not_found_or_wrong_type">> - }}}, + {post_config_update, _HandlerMod, [ + #{ + errors := [ + { + {_, my_test_bridge_update}, + {error, #{ + bridge_name := my_test_bridge_update, + connector_name := <<"unknown">>, + bridge_type := _, + reason := <<"connector_not_found_or_wrong_type">> + }} + } + ], + action := update + } + ]}}, update_root_config(RootConf0) ), @@ -623,12 +633,22 @@ t_load_no_matching_connector(_Config) -> }, ?assertMatch( {error, - {post_config_update, _HandlerMod, #{ - bridge_name := my_test_bridge_new, - connector_name := <<"unknown">>, - bridge_type := _, - reason := <<"connector_not_found_or_wrong_type">> - }}}, + {post_config_update, _HandlerMod, [ + #{ + errors := [ + { + {_, my_test_bridge_new}, + {error, #{ + bridge_name := my_test_bridge_new, + connector_name := <<"unknown">>, + bridge_type := _, + reason := <<"connector_not_found_or_wrong_type">> + }} + } + ], + action := create + } + ]}}, update_root_config(RootConf1) ), diff --git a/apps/emqx_connector/src/emqx_connector.erl b/apps/emqx_connector/src/emqx_connector.erl index 92cf9439e..ba2e3106f 100644 --- a/apps/emqx_connector/src/emqx_connector.erl +++ b/apps/emqx_connector/src/emqx_connector.erl @@ -169,16 +169,16 @@ post_config_update([?ROOT_KEY, Type, Name], _Req, NewConf, OldConf, _AppEnvs) -> ?tp(connector_post_config_update_done, #{}), ok. -%% The config update will be failed if any task in `perform_connector_changes` failed. perform_connector_changes(Removed, Added, Updated) -> Result = perform_connector_changes([ - #{action => fun emqx_connector_resource:remove/4, data => Removed}, + #{action => fun emqx_connector_resource:remove/4, action_name => remove, data => Removed}, #{ action => fun emqx_connector_resource:create/4, + action_name => create, data => Added, on_exception_fn => fun emqx_connector_resource:remove/4 }, - #{action => fun emqx_connector_resource:update/4, data => Updated} + #{action => fun emqx_connector_resource:update/4, action_name => update, data => Updated} ]), ?tp(connector_post_config_update_done, #{}), Result. @@ -351,28 +351,21 @@ convert_certs(ConnectorsConf) -> ). perform_connector_changes(Tasks) -> - perform_connector_changes(Tasks, ok). + perform_connector_changes(Tasks, []). -perform_connector_changes([], Result) -> - Result; -perform_connector_changes([#{action := Action, data := MapConfs} = Task | Tasks], Result0) -> +perform_connector_changes([], Errors) -> + case Errors of + [] -> ok; + _ -> {error, Errors} + end; +perform_connector_changes([#{action := Action, data := MapConfs} = Task | Tasks], Errors0) -> OnException = maps:get(on_exception_fn, Task, fun(_Type, _Name, _Conf, _Opts) -> ok end), - Result = maps:fold( - fun - ({_Type, _Name}, _Conf, {error, Reason}) -> - {error, Reason}; - %% for emqx_connector_resource:update/4 - ({Type, Name}, {OldConf, Conf}, _) -> - ResOpts = emqx_resource:fetch_creation_opts(Conf), - case Action(Type, Name, {OldConf, Conf}, ResOpts) of - {error, Reason} -> {error, Reason}; - Return -> Return - end; - ({Type, Name}, Conf, _) -> - ResOpts = emqx_resource:fetch_creation_opts(Conf), - try Action(Type, Name, Conf, ResOpts) of - {error, Reason} -> {error, Reason}; - Return -> Return + Results = emqx_utils:pmap( + fun({{Type, Name}, Conf}) -> + ResOpts = creation_opts(Conf), + Res = + try + Action(Type, Name, Conf, ResOpts) catch Kind:Error:Stacktrace -> ?SLOG(error, #{ @@ -384,13 +377,34 @@ perform_connector_changes([#{action := Action, data := MapConfs} = Task | Tasks] stacktrace => Stacktrace }), OnException(Type, Name, Conf, ResOpts), - erlang:raise(Kind, Error, Stacktrace) - end + {error, Error} + end, + {{Type, Name}, Res} end, - Result0, - MapConfs + maps:to_list(MapConfs), + infinity ), - perform_connector_changes(Tasks, Result). + Errs = lists:filter( + fun + ({_TypeName, {error, _}}) -> true; + (_) -> false + end, + Results + ), + Errors = + case Errs of + [] -> + Errors0; + _ -> + #{action_name := ActionName} = Task, + [#{action => ActionName, errors => Errs} | Errors0] + end, + perform_connector_changes(Tasks, Errors). + +creation_opts({_OldConf, Conf}) -> + emqx_resource:fetch_creation_opts(Conf); +creation_opts(Conf) -> + emqx_resource:fetch_creation_opts(Conf). diff_confs(NewConfs, OldConfs) -> emqx_utils_maps:diff_maps( diff --git a/changes/ce/perf-12354.en.md b/changes/ce/perf-12354.en.md new file mode 100644 index 000000000..ac0be69a0 --- /dev/null +++ b/changes/ce/perf-12354.en.md @@ -0,0 +1,3 @@ +Apply post config bridge changes in parallel. +This can greatly improve the performance when multiple bridges are being changed, +e.g. when a backup file is being imported. From d3a6870097307f53054625b5ac4217d0a11ef787 Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 19 Jan 2024 21:32:41 +0200 Subject: [PATCH 06/89] feat(emqx_utils): add pforeach/2,3 --- apps/emqx_utils/src/emqx_utils.erl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/apps/emqx_utils/src/emqx_utils.erl b/apps/emqx_utils/src/emqx_utils.erl index 046551e8d..0eeef2e5e 100644 --- a/apps/emqx_utils/src/emqx_utils.erl +++ b/apps/emqx_utils/src/emqx_utils.erl @@ -51,6 +51,8 @@ gen_id/0, gen_id/1, explain_posix/1, + pforeach/2, + pforeach/3, pmap/2, pmap/3, readable_error_msg/1, @@ -423,6 +425,15 @@ explain_posix(estale) -> "Stale remote file handle"; explain_posix(exdev) -> "Cross-domain link"; explain_posix(NotPosix) -> NotPosix. +-spec pforeach(fun((A) -> term()), list(A)) -> ok. +pforeach(Fun, List) when is_function(Fun, 1), is_list(List) -> + pforeach(Fun, List, ?DEFAULT_PMAP_TIMEOUT). + +-spec pforeach(fun((A) -> term()), list(A), timeout()) -> ok. +pforeach(Fun, List, Timeout) -> + _ = pmap(Fun, List, Timeout), + ok. + %% @doc Like lists:map/2, only the callback function is evaluated %% concurrently. -spec pmap(fun((A) -> B), list(A)) -> list(B). From dc15d37dcc0550296def87887b7c42a077f9e256 Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Fri, 19 Jan 2024 19:53:01 +0200 Subject: [PATCH 07/89] perf(emqx_bridge/connector): load and unload bridges/connectors in parallel This should reduce app start/stop time, when a large number of bridges/connectors are not healthy. --- apps/emqx_bridge/src/emqx_bridge.erl | 20 ++++++++++------- apps/emqx_bridge/src/emqx_bridge_v2.erl | 26 +++++++++++++--------- apps/emqx_connector/src/emqx_connector.erl | 20 ++++++++++------- 3 files changed, 40 insertions(+), 26 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge.erl b/apps/emqx_bridge/src/emqx_bridge.erl index 3cf9a199a..f48a44df2 100644 --- a/apps/emqx_bridge/src/emqx_bridge.erl +++ b/apps/emqx_bridge/src/emqx_bridge.erl @@ -103,33 +103,37 @@ load() -> Bridges = emqx:get_config([?ROOT_KEY], #{}), - lists:foreach( + emqx_utils:pforeach( fun({Type, NamedConf}) -> - lists:foreach( + emqx_utils:pforeach( fun({Name, Conf}) -> %% fetch opts for `emqx_resource_buffer_worker` ResOpts = emqx_resource:fetch_creation_opts(Conf), safe_load_bridge(Type, Name, Conf, ResOpts) end, - maps:to_list(NamedConf) + maps:to_list(NamedConf), + infinity ) end, - maps:to_list(Bridges) + maps:to_list(Bridges), + infinity ). unload() -> unload_hook(), Bridges = emqx:get_config([?ROOT_KEY], #{}), - lists:foreach( + emqx_utils:pforeach( fun({Type, NamedConf}) -> - lists:foreach( + emqx_utils:pforeach( fun({Name, _Conf}) -> _ = emqx_bridge_resource:stop(Type, Name) end, - maps:to_list(NamedConf) + maps:to_list(NamedConf), + infinity ) end, - maps:to_list(Bridges) + maps:to_list(Bridges), + infinity ). safe_load_bridge(Type, Name, Conf, Opts) -> diff --git a/apps/emqx_bridge/src/emqx_bridge_v2.erl b/apps/emqx_bridge/src/emqx_bridge_v2.erl index 522f0aca8..622dbf464 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2.erl @@ -182,17 +182,20 @@ load() -> load_bridges(RootName) -> Bridges = emqx:get_config([RootName], #{}), - lists:foreach( + _ = emqx_utils:pmap( fun({Type, Bridge}) -> - lists:foreach( + emqx_utils:pmap( fun({Name, BridgeConf}) -> install_bridge_v2(RootName, Type, Name, BridgeConf) end, - maps:to_list(Bridge) + maps:to_list(Bridge), + infinity ) end, - maps:to_list(Bridges) - ). + maps:to_list(Bridges), + infinity + ), + ok. unload() -> unload_bridges(?ROOT_KEY_ACTIONS), @@ -204,17 +207,20 @@ unload() -> unload_bridges(ConfRooKey) -> Bridges = emqx:get_config([ConfRooKey], #{}), - lists:foreach( + _ = emqx_utils:pmap( fun({Type, Bridge}) -> - lists:foreach( + emqx_utils:pmap( fun({Name, BridgeConf}) -> uninstall_bridge_v2(ConfRooKey, Type, Name, BridgeConf) end, - maps:to_list(Bridge) + maps:to_list(Bridge), + infinity ) end, - maps:to_list(Bridges) - ). + maps:to_list(Bridges), + infinity + ), + ok. %%==================================================================== %% CRUD API diff --git a/apps/emqx_connector/src/emqx_connector.erl b/apps/emqx_connector/src/emqx_connector.erl index ba2e3106f..f4fee3eac 100644 --- a/apps/emqx_connector/src/emqx_connector.erl +++ b/apps/emqx_connector/src/emqx_connector.erl @@ -54,30 +54,34 @@ load() -> Connectors = emqx:get_config([?ROOT_KEY], #{}), - lists:foreach( + emqx_utils:pforeach( fun({Type, NamedConf}) -> - lists:foreach( + emqx_utils:pforeach( fun({Name, Conf}) -> safe_load_connector(Type, Name, Conf) end, - maps:to_list(NamedConf) + maps:to_list(NamedConf), + infinity ) end, - maps:to_list(Connectors) + maps:to_list(Connectors), + infinity ). unload() -> Connectors = emqx:get_config([?ROOT_KEY], #{}), - lists:foreach( + emqx_utils:pforeach( fun({Type, NamedConf}) -> - lists:foreach( + emqx_utils:pforeach( fun({Name, _Conf}) -> _ = emqx_connector_resource:stop(Type, Name) end, - maps:to_list(NamedConf) + maps:to_list(NamedConf), + infinity ) end, - maps:to_list(Connectors) + maps:to_list(Connectors), + infinity ). safe_load_connector(Type, Name, Conf) -> From a35698009cf8065f86b940dd0d7d1e78c117d24f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 16:25:54 +0800 Subject: [PATCH 08/89] feat(dashboard): endpoint `/monitor_current` provides mor fields - `retained_msg_count` Current retained_msg_count on each node and should be same on all nodes. - `license_quota` Only for enterprise edition, provides the max limited connections num. --- .../src/emqx_dashboard_monitor.erl | 22 ++++++++++++++++++- .../src/emqx_dashboard_monitor_api.erl | 14 +++++++++++- apps/emqx_retainer/src/emqx_retainer_api.erl | 6 ++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index ad95e8678..7916a6b58 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -173,7 +173,9 @@ handle_call(current_rate, _From, State = #state{last = Last}) -> NowTime = erlang:system_time(millisecond), NowSamplers = sample(NowTime), Rate = cal_rate(NowSamplers, Last), - {reply, {ok, Rate}, State}; + NonRateValue = non_rate_value(), + Samples = maps:merge(Rate, NonRateValue), + {reply, {ok, Samples}, State}; handle_call(_Request, _From, State = #state{}) -> {reply, ok, State}. @@ -409,3 +411,21 @@ stats(received_bytes) -> emqx_metrics:val('bytes.received'); stats(sent) -> emqx_metrics:val('messages.sent'); stats(sent_bytes) -> emqx_metrics:val('bytes.sent'); stats(dropped) -> emqx_metrics:val('messages.dropped'). + +%% ------------------------------------------------------------------------------------------------- +%% Retained && License Quota + +%% the non rate values should be same on all nodes +non_rate_value() -> + #{ + retained_msg_count => emqx_retainer:retained_count(), + license_quota => license_quota() + }. + +license_quota() -> + case emqx_license_checker:limits() of + {ok, #{max_connections := Quota}} -> + Quota; + {error, no_license} -> + 0 + end. diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl index a152531f1..309137362 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl @@ -1,5 +1,17 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_dashboard_monitor_api). diff --git a/apps/emqx_retainer/src/emqx_retainer_api.erl b/apps/emqx_retainer/src/emqx_retainer_api.erl index bb232f9e4..6d5eee477 100644 --- a/apps/emqx_retainer/src/emqx_retainer_api.erl +++ b/apps/emqx_retainer/src/emqx_retainer_api.erl @@ -44,7 +44,11 @@ api_spec() -> emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}). paths() -> - [?PREFIX, ?PREFIX ++ "/messages", ?PREFIX ++ "/message/:topic"]. + [ + ?PREFIX, + ?PREFIX ++ "/messages", + ?PREFIX ++ "/message/:topic" + ]. schema(?PREFIX) -> #{ From 3d6b65acedda637eacea1e4e65722f36dcd3c18b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 16:30:16 +0800 Subject: [PATCH 09/89] chore: emqx_dashboard_monitor:current_rate/0 not exported anymore --- .../src/emqx_dashboard_monitor.erl | 66 +++++++++++-------- .../src/emqx_dashboard_monitor_api.erl | 4 ++ 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index 7916a6b58..692c7a62e 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -40,11 +40,14 @@ -export([ samplers/0, samplers/2, - current_rate/0, current_rate/1, granularity_adapter/1 ]). +-ifdef(TEST). +-export([current_rate_cluster/0]). +-endif. + %% for rpc -export([do_sample/2]). @@ -112,8 +115,33 @@ granularity_adapter(List) when length(List) > 1000 -> granularity_adapter(List) -> List. +current_rate(all) -> + current_rate_cluster(); +current_rate(Node) when Node == node() -> + try + {ok, Rate} = do_call(current_rate), + {ok, Rate} + catch + _E:R -> + ?SLOG(warning, #{msg => "dashboard_monitor_error", reason => R}), + %% Rate map 0, ensure api will not crash. + %% When joining cluster, dashboard monitor restart. + Rate0 = [ + {Key, 0} + || Key <- ?GAUGE_SAMPLER_LIST ++ maps:values(?DELTA_SAMPLER_RATE_MAP) + ], + {ok, maps:merge(maps:from_list(Rate0), non_rate_value())} + end; +current_rate(Node) -> + case emqx_dashboard_proto_v1:current_rate(Node) of + {badrpc, Reason} -> + {badrpc, {Node, Reason}}; + {ok, Rate} -> + {ok, Rate} + end. + %% Get the current rate. Not the current sampler data. -current_rate() -> +current_rate_cluster() -> Fun = fun (Node, Cluster) when is_map(Cluster) -> @@ -133,31 +161,6 @@ current_rate() -> {ok, Rate} end. -current_rate(all) -> - current_rate(); -current_rate(Node) when Node == node() -> - try - {ok, Rate} = do_call(current_rate), - {ok, Rate} - catch - _E:R -> - ?SLOG(warning, #{msg => "dashboard_monitor_error", reason => R}), - %% Rate map 0, ensure api will not crash. - %% When joining cluster, dashboard monitor restart. - Rate0 = [ - {Key, 0} - || Key <- ?GAUGE_SAMPLER_LIST ++ maps:values(?DELTA_SAMPLER_RATE_MAP) - ], - {ok, maps:from_list(Rate0)} - end; -current_rate(Node) -> - case emqx_dashboard_proto_v1:current_rate(Node) of - {badrpc, Reason} -> - {badrpc, {Node, Reason}}; - {ok, Rate} -> - {ok, Rate} - end. - %% ------------------------------------------------------------------------------------------------- %% gen_server functions @@ -258,8 +261,13 @@ merge_cluster_sampler_map(M1, M2) -> merge_cluster_rate(Node, Cluster) -> Fun = fun - (topics, Value, NCluster) -> - NCluster#{topics => Value}; + %% cluster-synced values + (topics, V, NCluster) -> + NCluster#{topics => V}; + (retained_msg_count, V, NCluster) -> + NCluster#{retained_msg_count => V}; + (license_quota, V, NCluster) -> + NCluster#{license_quota => V}; (Key, Value, NCluster) -> ClusterValue = maps:get(Key, NCluster, 0), NCluster#{Key => Value + ClusterValue} diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl index 309137362..fc4b171a4 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl @@ -151,7 +151,11 @@ monitor_current(get, #{bindings := Bindings}) -> RawNode = maps:get(node, Bindings, <<"all">>), emqx_utils_api:with_node_or_cluster(RawNode, fun current_rate/1). +-spec current_rate(atom()) -> + {error, term()} + | {ok, Result :: map()}. current_rate(Node) -> + %% Node :: 'all' or `NodeName` case emqx_dashboard_monitor:current_rate(Node) of {badrpc, _} = BadRpc -> {error, BadRpc}; From 7bc3a5090d71701dfee5ba785ed8fda5a3248854 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 12 Jan 2024 04:36:14 +0800 Subject: [PATCH 10/89] fix(dashboard): meck `emqx_retainer` in SUITE --- apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl | 2 -- apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl index fc4b171a4..d7e3c094c 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl @@ -145,8 +145,6 @@ dashboard_samplers_fun(Latest) -> end end. -monitor_current(get, #{bindings := []}) -> - emqx_utils_api:with_node_or_cluster(erlang:node(), fun emqx_dashboard_monitor:current_rate/1); monitor_current(get, #{bindings := Bindings}) -> RawNode = maps:get(node, Bindings, <<"all">>), emqx_utils_api:with_node_or_cluster(RawNode, fun current_rate/1). diff --git a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl index 71e559647..8b02ae20f 100644 --- a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl +++ b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl @@ -31,10 +31,13 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> + meck:new(emqx_retainer, [non_strict, passthrough, no_history, no_link]), + meck:expect(emqx_retainer, retained_count, fun() -> 0 end), emqx_mgmt_api_test_util:init_suite([]), Config. end_per_suite(_Config) -> + meck:unload([emqx_retainer]), emqx_mgmt_api_test_util:end_suite([]). t_monitor_samplers_all(_Config) -> @@ -198,5 +201,5 @@ waiting_emqx_stats_and_monitor_update(WaitKey) -> end, meck:unload([emqx_stats]), %% manually call monitor update - _ = emqx_dashboard_monitor:current_rate(), + _ = emqx_dashboard_monitor:current_rate_cluster(), ok. From ecd0da9fde718f2ed880ee368965be0789facdc4 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 17:05:04 +0800 Subject: [PATCH 11/89] feat(dashboard): `/monitor_current/nodes/{node}` provide `node_uptime` --- apps/emqx_dashboard/src/emqx_dashboard_monitor.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index 692c7a62e..b21712497 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -268,6 +268,9 @@ merge_cluster_rate(Node, Cluster) -> NCluster#{retained_msg_count => V}; (license_quota, V, NCluster) -> NCluster#{license_quota => V}; + %% for cluster sample, ignore node_uptime + (node_uptime, _V, NCluster) -> + NCluster; (Key, Value, NCluster) -> ClusterValue = maps:get(Key, NCluster, 0), NCluster#{Key => Value + ClusterValue} @@ -427,7 +430,8 @@ stats(dropped) -> emqx_metrics:val('messages.dropped'). non_rate_value() -> #{ retained_msg_count => emqx_retainer:retained_count(), - license_quota => license_quota() + license_quota => license_quota(), + node_uptime => emqx_sys:uptime() }. license_quota() -> From 89128958ed99b9e1ae4a98975b6db33e25a97ce7 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sat, 13 Jan 2024 16:48:12 +0800 Subject: [PATCH 12/89] fix(dashboard): licence quota not provided to ce edition --- apps/emqx_dashboard/src/emqx_dashboard_monitor.erl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index b21712497..4891b5293 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -428,16 +428,20 @@ stats(dropped) -> emqx_metrics:val('messages.dropped'). %% the non rate values should be same on all nodes non_rate_value() -> - #{ + (license_quota())#{ retained_msg_count => emqx_retainer:retained_count(), - license_quota => license_quota(), node_uptime => emqx_sys:uptime() }. +-if(?EMQX_RELEASE_EDITION == ee). license_quota() -> case emqx_license_checker:limits() of {ok, #{max_connections := Quota}} -> - Quota; + #{license_quota => Quota}; {error, no_license} -> - 0 + #{license_quota => 0} end. +-else. +license_quota() -> + #{}. +-endif. From 81ba166b4974f7f27e3b5158ada877f20620a100 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 19:09:33 +0800 Subject: [PATCH 13/89] feat(license): expiry epoch api --- apps/emqx_license/src/emqx_license.app.src | 2 +- apps/emqx_license/src/emqx_license_checker.erl | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/apps/emqx_license/src/emqx_license.app.src b/apps/emqx_license/src/emqx_license.app.src index 8d11c6522..7b80af601 100644 --- a/apps/emqx_license/src/emqx_license.app.src +++ b/apps/emqx_license/src/emqx_license.app.src @@ -1,6 +1,6 @@ {application, emqx_license, [ {description, "EMQX License"}, - {vsn, "5.0.14"}, + {vsn, "5.0.15"}, {modules, []}, {registered, [emqx_license_sup]}, {applications, [kernel, stdlib, emqx_ctl]}, diff --git a/apps/emqx_license/src/emqx_license_checker.erl b/apps/emqx_license/src/emqx_license_checker.erl index 198814fb9..b1547b52e 100644 --- a/apps/emqx_license/src/emqx_license_checker.erl +++ b/apps/emqx_license/src/emqx_license_checker.erl @@ -30,6 +30,7 @@ start_link/2, update/1, dump/0, + expiry_epoch/0, purge/0, limits/0, print_warnings/1 @@ -67,6 +68,10 @@ update(License) -> dump() -> gen_server:call(?MODULE, dump, infinity). +-spec expiry_epoch() -> integer(). +expiry_epoch() -> + gen_server:call(?MODULE, expiry_epoch, infinity). + -spec limits() -> {ok, limits()} | {error, any()}. limits() -> try ets:lookup(?LICENSE_TAB, limits) of @@ -111,6 +116,9 @@ handle_call({update, License}, _From, #{license := Old} = State) -> {reply, check_license(License), State1#{license => License}}; handle_call(dump, _From, #{license := License} = State) -> {reply, emqx_license_parser:dump(License), State}; +handle_call(expiry_epoch, _From, #{license := License} = State) -> + ExpiryEpoch = date_to_expiry_epoch(emqx_license_parser:expiry_date(License)), + {reply, ExpiryEpoch, State}; handle_call(purge, _From, State) -> _ = ets:delete_all_objects(?LICENSE_TAB), {reply, ok, State}; @@ -234,6 +242,11 @@ small_customer_overdue(_CType, _DaysLeft) -> false. non_official_license_overdue(?OFFICIAL, _) -> false; non_official_license_overdue(_, DaysLeft) -> DaysLeft < 0. +%% 62167219200 =:= calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}). +-define(EPOCH_START, 62167219200). +date_to_expiry_epoch({Y, M, D}) -> + calendar:datetime_to_gregorian_seconds({{Y, M, D}, {0, 0, 0}}) - ?EPOCH_START. + apply_limits(Limits) -> ets:insert(?LICENSE_TAB, {limits, Limits}). From fae0cea17f8be7520ff738585e114487f49d75b9 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 23:48:28 +0800 Subject: [PATCH 14/89] fix(license): make dialyzer happy --- apps/emqx_license/src/emqx_license_checker.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/emqx_license/src/emqx_license_checker.erl b/apps/emqx_license/src/emqx_license_checker.erl index b1547b52e..88bc79f90 100644 --- a/apps/emqx_license/src/emqx_license_checker.erl +++ b/apps/emqx_license/src/emqx_license_checker.erl @@ -244,6 +244,7 @@ non_official_license_overdue(_, DaysLeft) -> DaysLeft < 0. %% 62167219200 =:= calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}). -define(EPOCH_START, 62167219200). +-spec date_to_expiry_epoch(calendar:date()) -> Seconds :: non_neg_integer(). date_to_expiry_epoch({Y, M, D}) -> calendar:datetime_to_gregorian_seconds({{Y, M, D}, {0, 0, 0}}) - ?EPOCH_START. From 558c4713927a507ee3948eb4bddfb9a3842f609d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 22:27:55 +0800 Subject: [PATCH 15/89] feat(prometheus): license expiry at epoch as gauge --- apps/emqx_prometheus/src/emqx_prometheus.erl | 21 ++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index d513e2c37..327586996 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -173,7 +173,9 @@ collect_mf(_Registry, Callback) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), + LicenseData = emqx_license_data(), ClusterData = emqx_cluster_data(), + _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], _ = [add_collect_family(Name, ClusterData, Callback, gauge) || Name <- emqx_cluster()], @@ -192,7 +194,9 @@ collect(<<"json">>) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), + LicenseData = emqx_license_data(), #{ + license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()]), stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), packets => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_packets()]), @@ -506,7 +510,6 @@ emqx_collect(emqx_authentication_failure, Stats) -> counter_metric(?C('authentication.failure', Stats)); %%-------------------------------------------------------------------- %% VM - emqx_collect(emqx_vm_cpu_use, VMData) -> gauge_metric(?C(cpu_use, VMData)); emqx_collect(emqx_vm_cpu_idle, VMData) -> @@ -522,7 +525,11 @@ emqx_collect(emqx_vm_used_memory, VMData) -> emqx_collect(emqx_cluster_nodes_running, ClusterData) -> gauge_metric(?C(nodes_running, ClusterData)); emqx_collect(emqx_cluster_nodes_stopped, ClusterData) -> - gauge_metric(?C(nodes_stopped, ClusterData)). + gauge_metric(?C(nodes_stopped, ClusterData)); +%%-------------------------------------------------------------------- +%% License +emqx_collect(emqx_license_expiry_at, LicenseData) -> + gauge_metric(?C(expiry_at, LicenseData)). %%-------------------------------------------------------------------- %% Indicators @@ -687,6 +694,16 @@ emqx_cluster_data() -> {nodes_stopped, length(Stopped)} ]. +emqx_license() -> + [ + emqx_license_expiry_at + ]. + +emqx_license_data() -> + [ + {expiry_at, emqx_license_checker:expiry_epoch()} + ]. + %% deprecated_since 5.0.10, remove this when 5.1.x do_start() -> emqx_prometheus_sup:start_child(?APP). From 092159b07106868c674564229fb926a51632ee30 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 12 Jan 2024 03:44:57 +0800 Subject: [PATCH 16/89] feat(prometheus): cert expiry epoch in endpoint `/prometheus/stats` --- apps/emqx_prometheus/src/emqx_prometheus.erl | 135 +++++++++++++++++- .../src/emqx_prometheus_api.erl | 2 +- 2 files changed, 131 insertions(+), 6 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 327586996..3ac32a47c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -24,6 +24,7 @@ -include("emqx_prometheus.hrl"). +-include_lib("public_key/include/public_key.hrl"). -include_lib("prometheus/include/prometheus_model.hrl"). -include_lib("emqx/include/logger.hrl"). @@ -32,6 +33,7 @@ [ create_mf/5, gauge_metric/1, + gauge_metrics/1, counter_metric/1 ] ). @@ -175,7 +177,10 @@ collect_mf(_Registry, Callback) -> VMData = emqx_vm_data(), LicenseData = emqx_license_data(), ClusterData = emqx_cluster_data(), + CertsData = emqx_certs_data(), + %% TODO: license expiry epoch and cert expiry epoch should be cached _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], + _ = [add_collect_family(Name, CertsData, Callback, gauge) || Name <- emqx_certs()], _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], _ = [add_collect_family(Name, ClusterData, Callback, gauge) || Name <- emqx_cluster()], @@ -195,8 +200,13 @@ collect(<<"json">>) -> Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), LicenseData = emqx_license_data(), + %% TODO: FIXME! + %% emqx_metrics_olp()), + %% emqx_metrics_acl()), + %% emqx_metrics_authn()), #{ license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()]), + certs => collect_certs_json(emqx_certs_data()), stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), packets => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_packets()]), @@ -223,10 +233,7 @@ collect_metrics(Name, Metrics) -> emqx_collect(Name, Metrics). add_collect_family(Name, Data, Callback, Type) -> - Callback(create_schema(Name, <<"">>, Data, Type)). - -create_schema(Name, Help, Data, Type) -> - create_mf(Name, Help, Type, ?MODULE, Data). + Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). %%-------------------------------------------------------------------- %% Collector @@ -529,7 +536,11 @@ emqx_collect(emqx_cluster_nodes_stopped, ClusterData) -> %%-------------------------------------------------------------------- %% License emqx_collect(emqx_license_expiry_at, LicenseData) -> - gauge_metric(?C(expiry_at, LicenseData)). + gauge_metric(?C(expiry_at, LicenseData)); +%%-------------------------------------------------------------------- +%% Certs +emqx_collect(emqx_cert_expiry_at, CertsData) -> + gauge_metrics(CertsData). %%-------------------------------------------------------------------- %% Indicators @@ -704,6 +715,120 @@ emqx_license_data() -> {expiry_at, emqx_license_checker:expiry_epoch()} ]. +emqx_certs() -> + [ + emqx_cert_expiry_at + ]. + +-define(LISTENER_TYPES, [ssl, wss, quic]). + +-spec emqx_certs_data() -> + [_Point :: {[Label], Epoch}] +when + Label :: TypeLabel | NameLabel | CertTypeLabel, + TypeLabel :: {listener_type, ssl | wss | quic}, + NameLabel :: {listener_name, atom()}, + CertTypeLabel :: {cert_type, cacertfile | certfile}, + Epoch :: non_neg_integer(). +emqx_certs_data() -> + case emqx_config:get([listeners], undefined) of + undefined -> + []; + AllListeners when is_map(AllListeners) -> + lists:foldl( + fun(ListenerType, PointsAcc) -> + PointsAcc ++ + points_of_listeners(ListenerType, AllListeners) + end, + _PointsInitAcc = [], + ?LISTENER_TYPES + ) + end. + +points_of_listeners(Type, AllListeners) -> + do_points_of_listeners(Type, maps:get(Type, AllListeners, undefined)). + +-define(CERT_TYPES, [cacertfile, certfile]). + +-spec do_points_of_listeners(Type, TypeOfListeners) -> + [_Point :: {[{LabelKey, LabelValue}], Epoch}] +when + Type :: ssl | wss | quic, + TypeOfListeners :: #{ListenerName :: atom() => ListenerConf :: map()} | undefined, + LabelKey :: atom(), + LabelValue :: atom(), + Epoch :: non_neg_integer(). +do_points_of_listeners(_, undefined) -> + []; +do_points_of_listeners(ListenerType, TypeOfListeners) -> + lists:foldl( + fun(Name, PointsAcc) -> + lists:foldl( + fun(CertType, AccIn) -> + case + emqx_utils_maps:deep_get( + [Name, ssl_options, CertType], TypeOfListeners, undefined + ) + of + undefined -> AccIn; + Path -> [gen_point(ListenerType, Name, CertType, Path) | AccIn] + end + end, + [], + ?CERT_TYPES + ) ++ PointsAcc + end, + [], + maps:keys(TypeOfListeners) + ). + +gen_point(Type, Name, CertType, Path) -> + { + %% Labels: [{_Labelkey, _LabelValue}] + [ + {listener_type, Type}, + {listener_name, Name}, + {cert_type, CertType} + ], + %% Value + cert_expiry_at_from_path(Path) + }. + +collect_certs_json(CertsData) -> + lists:foldl( + fun({Labels, Data}, AccIn) -> + [(maps:from_list(Labels))#{emqx_cert_expiry_at => Data} | AccIn] + end, + _InitAcc = [], + CertsData + ). + +%% TODO: cert manager for more generic utils functions +cert_expiry_at_from_path(Path0) -> + Path = emqx_schema:naive_env_interpolation(Path0), + {ok, PemBin} = file:read_file(Path), + [CertEntry | _] = public_key:pem_decode(PemBin), + Cert = public_key:pem_entry_decode(CertEntry), + {'utcTime', NotAfterUtc} = + Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', + utc_time_to_epoch(NotAfterUtc). + +utc_time_to_epoch(UtcTime) -> + date_to_expiry_epoch(utc_time_to_datetime(UtcTime)). + +utc_time_to_datetime(Str) -> + {ok, [Year, Month, Day, Hour, Minute, Second], _} = io_lib:fread( + "~2d~2d~2d~2d~2d~2dZ", Str + ), + %% Alwoys Assuming YY is in 2000 + {{2000 + Year, Month, Day}, {Hour, Minute, Second}}. + +%% 62167219200 =:= calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}). +-define(EPOCH_START, 62167219200). +-spec date_to_expiry_epoch(calendar:datetime()) -> Seconds :: non_neg_integer(). +date_to_expiry_epoch(DateTime) -> + calendar:datetime_to_gregorian_seconds(DateTime) - ?EPOCH_START. + %% deprecated_since 5.0.10, remove this when 5.1.x do_start() -> emqx_prometheus_sup:start_child(?APP). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 44e0fac16..5bfa3e3a5 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -181,7 +181,7 @@ recommend_setting_example() -> prometheus_data_schema() -> #{ description => - <<"Get Prometheus Data. Note that support for JSON output is deprecated and will be removed in v5.2.">>, + <<"Get Prometheus Data.">>, content => [ {'text/plain', #{schema => #{type => string}}}, From e0feb580b6a69dc6fdc1b3c23ce857769c646c9b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 12 Jan 2024 05:40:28 +0800 Subject: [PATCH 17/89] feat(prometheus): auth metrics with text/plain --- apps/emqx_conf/src/emqx_conf_schema.erl | 1 + .../include/emqx_prometheus.hrl | 12 + apps/emqx_prometheus/rebar.config | 3 +- .../src/emqx_prometheus.app.src | 2 +- apps/emqx_prometheus/src/emqx_prometheus.erl | 11 +- .../src/emqx_prometheus_api.erl | 30 +- .../src/emqx_prometheus_auth.erl | 400 ++++++++++++++++++ .../src/emqx_prometheus_config.erl | 11 +- rel/i18n/emqx_prometheus_api.hocon | 5 + 9 files changed, 467 insertions(+), 8 deletions(-) create mode 100644 apps/emqx_prometheus/src/emqx_prometheus_auth.erl diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index 6614b24e2..571f5785b 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1106,6 +1106,7 @@ tr_prometheus_collectors(Conf) -> prometheus_summary, %% emqx collectors emqx_prometheus, + {'/prometheus/auth', emqx_prometheus_auth}, emqx_prometheus_mria %% builtin vm collectors | prometheus_collectors(Conf) diff --git a/apps/emqx_prometheus/include/emqx_prometheus.hrl b/apps/emqx_prometheus/include/emqx_prometheus.hrl index 8d552f025..9057f2b14 100644 --- a/apps/emqx_prometheus/include/emqx_prometheus.hrl +++ b/apps/emqx_prometheus/include/emqx_prometheus.hrl @@ -16,3 +16,15 @@ -define(APP, emqx_prometheus). -define(PROMETHEUS, [prometheus]). + +-define(PROMETHEUS_DEFAULT_REGISTRY, default). +-define(PROMETHEUS_AUTH_REGISTRY, '/prometheus/auth'). +-define(PROMETHEUS_AUTH_COLLECTOR, emqx_prometheus_auth). +-define(PROMETHEUS_DATA_INTEGRATION_REGISTRY, '/prometheus/data_integration'). +-define(PROMETHEUS_DATA_INTEGRATION_COLLECTOR, emqx_prometheus_data_integration). + +-define(PROMETHEUS_ALL_REGISTRYS, [ + ?PROMETHEUS_DEFAULT_REGISTRY, + ?PROMETHEUS_AUTH_REGISTRY, + ?PROMETHEUS_DATA_INTEGRATION_REGISTRY +]). diff --git a/apps/emqx_prometheus/rebar.config b/apps/emqx_prometheus/rebar.config index 12aa9060b..649437765 100644 --- a/apps/emqx_prometheus/rebar.config +++ b/apps/emqx_prometheus/rebar.config @@ -3,7 +3,8 @@ {deps, [ {emqx, {path, "../emqx"}}, {emqx_utils, {path, "../emqx_utils"}}, - {prometheus, {git, "https://github.com/emqx/prometheus.erl", {tag, "v4.10.0.1"}}} + {emqx_auth, {path, "../emqx_auth"}}, + {prometheus, {git, "https://github.com/emqx/prometheus.erl", {tag, "v4.10.0.2"}}} ]}. {edoc_opts, [{preprocess, true}]}. diff --git a/apps/emqx_prometheus/src/emqx_prometheus.app.src b/apps/emqx_prometheus/src/emqx_prometheus.app.src index fe0c42566..75c608087 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.app.src +++ b/apps/emqx_prometheus/src/emqx_prometheus.app.src @@ -5,7 +5,7 @@ {vsn, "5.0.19"}, {modules, []}, {registered, [emqx_prometheus_sup]}, - {applications, [kernel, stdlib, prometheus, emqx, emqx_management]}, + {applications, [kernel, stdlib, prometheus, emqx, emqx_auth, emqx_management]}, {mod, {emqx_prometheus_app, []}}, {env, []}, {licenses, ["Apache-2.0"]}, diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 3ac32a47c..7c3283043 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -121,7 +121,7 @@ handle_info(_Msg, State) -> {noreply, State}. push_to_push_gateway(Url, Headers) when is_list(Headers) -> - Data = prometheus_text_format:format(), + Data = prometheus_text_format:format(?PROMETHEUS_DEFAULT_REGISTRY), case httpc:request(post, {Url, Headers, "text/plain", Data}, ?HTTP_OPTIONS, []) of {ok, {{"HTTP/1.1", 200, _}, _RespHeaders, _RespBody}} -> ok; @@ -168,10 +168,10 @@ join_url(Url, JobName0) -> }), lists:concat([Url, "/metrics/job/", unicode:characters_to_list(JobName1)]). -deregister_cleanup(_Registry) -> +deregister_cleanup(?PROMETHEUS_DEFAULT_REGISTRY) -> ok. -collect_mf(_Registry, Callback) -> +collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), @@ -192,6 +192,8 @@ collect_mf(_Registry, Callback) -> _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_olp()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_acl()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_authn()], + ok; +collect_mf(_Registry, _Callback) -> ok. %% @private @@ -216,7 +218,7 @@ collect(<<"json">>) -> session => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_session()]) }; collect(<<"prometheus">>) -> - prometheus_text_format:format(). + prometheus_text_format:format(?PROMETHEUS_DEFAULT_REGISTRY). %% @private collect_stats(Name, Stats) -> @@ -809,6 +811,7 @@ cert_expiry_at_from_path(Path0) -> {ok, PemBin} = file:read_file(Path), [CertEntry | _] = public_key:pem_decode(PemBin), Cert = public_key:pem_entry_decode(CertEntry), + %% TODO: Not fully tested for all certs type {'utcTime', NotAfterUtc} = Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', utc_time_to_epoch(NotAfterUtc). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 5bfa3e3a5..1017dd16b 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -28,7 +28,8 @@ -export([ setting/2, - stats/2 + stats/2, + auth/2 ]). -define(TAGS, [<<"Monitor">>]). @@ -39,6 +40,7 @@ api_spec() -> paths() -> [ "/prometheus", + "/prometheus/auth", "/prometheus/stats" ]. @@ -61,6 +63,18 @@ schema("/prometheus") -> #{200 => prometheus_setting_response()} } }; +schema("/prometheus/auth") -> + #{ + 'operationId' => auth, + get => + #{ + description => ?DESC(get_prom_auth_data), + tags => ?TAGS, + security => security(), + responses => + #{200 => prometheus_data_schema()} + } + }; schema("/prometheus/stats") -> #{ 'operationId' => stats, @@ -114,6 +128,20 @@ stats(get, #{headers := Headers}) -> {200, #{<<"content-type">> => <<"text/plain">>}, Data} end. +auth(get, #{headers := Headers}) -> + Type = + case maps:get(<<"accept">>, Headers, <<"text/plain">>) of + <<"application/json">> -> <<"json">>; + _ -> <<"prometheus">> + end, + Data = emqx_prometheus_auth:collect(Type), + case Type of + <<"json">> -> + {200, Data}; + <<"prometheus">> -> + {200, #{<<"content-type">> => <<"text/plain">>}, Data} + end. + %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl new file mode 100644 index 000000000..5257f225b --- /dev/null +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -0,0 +1,400 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_prometheus_auth). + +-export([ + deregister_cleanup/1, + collect_mf/2, + collect_metrics/2 +]). + +-export([collect/1]). + +-include("emqx_prometheus.hrl"). +-include_lib("emqx_auth/include/emqx_authn_chains.hrl"). +-include_lib("prometheus/include/prometheus.hrl"). + +-import( + prometheus_model_helpers, + [ + create_mf/5, + gauge_metric/1, + gauge_metrics/1 + ] +). + +-type authn_metric_key() :: + emqx_authn_enable + | emqx_authn_status + | emqx_authn_nomatch + | emqx_authn_total + | emqx_authn_success + | emqx_authn_failed + | emqx_authn_rate + | emqx_authn_rate_last5m + | emqx_authn_rate_max. + +-type authz_metric_key() :: + emqx_authz_enable + | emqx_authz_status + | emqx_authz_nomatch + | emqx_authz_total + | emqx_authz_success + | emqx_authz_failed + | emqx_authz_rate + | emqx_authz_rate_last5m + | emqx_authz_rate_max. + +%% Please don't remove this attribute, prometheus uses it to +%% automatically register collectors. +-behaviour(prometheus_collector). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +-define(METRIC_NAME_PREFIX, "emqx_auth_"). + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(MG0(K, MAP), maps:get(K, MAP, 0)). + +%%-------------------------------------------------------------------- +%% Collector API +%%-------------------------------------------------------------------- + +%% @private +deregister_cleanup(_) -> ok. + +%% @private +-spec collect_mf(_Registry, Callback) -> ok when + _Registry :: prometheus_registry:registry(), + Callback :: prometheus_collector:collect_mf_callback(). +%% erlfmt-ignore +collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> + _ = [add_collect_family(Name, authn_data(), Callback, gauge) || Name <- authn()], + _ = [add_collect_family(Name, authn_users_count_data(), Callback, gauge) || Name <- authn_users_count()], + _ = [add_collect_family(Name, authz_data(), Callback, gauge) || Name <- authz()], + _ = [add_collect_family(Name, authz_rules_count_data(), Callback, gauge) || Name <- authz_rules_count()], + _ = [add_collect_family(Name, banned_count_data(), Callback, gauge) || Name <- banned()], + ok; +collect_mf(_, _) -> + ok. + +%% @private +collect(<<"json">>) -> + %% TODO + #{}; +collect(<<"prometheus">>) -> + prometheus_text_format:format(?PROMETHEUS_AUTH_REGISTRY). + +add_collect_family(Name, Data, Callback, Type) -> + Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). + +collect_metrics(Name, Metrics) -> + collect_auth(Name, Metrics). + +%%-------------------------------------------------------------------- +%% Collector +%%-------------------------------------------------------------------- + +%%==================== +%% Authn overview +collect_auth(K = emqx_authn_enable, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_status, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_nomatch, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_total, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_rate_max, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Authn users count +%% Only provided for `password_based:built_in_database` and `scram:built_in_database` +collect_auth(K = emqx_authn_users_count, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Authz overview +collect_auth(K = emqx_authz_enable, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_status, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_nomatch, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_total, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_rate_max, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Authz rules count +%% Only provided for `file` and `built_in_database` +collect_auth(K = emqx_authz_rules_count, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Banned +collect_auth(emqx_banned_count, Data) -> + gauge_metric(Data). + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +%%======================================== +%% AuthN (Authentication) +%%======================================== + +%%==================== +%% Authn overview +authn() -> + [ + emqx_authn_enable, + emqx_authn_status, + emqx_authn_nomatch, + emqx_authn_total, + emqx_authn_success, + emqx_authn_failed, + emqx_authn_rate, + emqx_authn_rate_last5m, + emqx_authn_rate_max + ]. + +-spec authn_data() -> #{Key => [Point]} when + Key :: authn_metric_key(), + Point :: {[Label], Metric}, + Label :: IdLabel, + IdLabel :: {id, AuthnName :: binary()}, + Metric :: number(). +authn_data() -> + Authns = emqx_config:get([authentication]), + lists:foldl( + fun(Key, AccIn) -> + AccIn#{Key => authn_backend_to_points(Key, Authns)} + end, + #{}, + authn() + ). + +-spec authn_backend_to_points(Key, list(Authn)) -> list(Point) when + Key :: authn_metric_key(), + Authn :: map(), + Point :: {[Label], Metric}, + Label :: IdLabel, + IdLabel :: {id, AuthnName :: binary()}, + Metric :: number(). +authn_backend_to_points(Key, Authns) -> + do_authn_backend_to_points(Key, Authns, []). + +do_authn_backend_to_points(_K, [], AccIn) -> + lists:reverse(AccIn); +do_authn_backend_to_points(K, [Authn | Rest], AccIn) -> + Id = authenticator_id(Authn), + Point = {[{id, Id}], do_metric(K, Authn, lookup_authn_metrics_local(Id))}, + do_authn_backend_to_points(K, Rest, [Point | AccIn]). + +lookup_authn_metrics_local(Id) -> + case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of + {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + #{ + emqx_authn_status => status_to_number(Status), + emqx_authn_nomatch => ?MG0(nomatch, Counters), + emqx_authn_total => ?MG0(total, Counters), + emqx_authn_success => ?MG0(success, Counters), + emqx_authn_failed => ?MG0(failed, Counters), + emqx_authn_rate => ?MG0(current, Rate), + emqx_authn_rate_last5m => ?MG0(last5m, Rate), + emqx_authn_rate_max => ?MG0(max, Rate) + }; + {error, _Reason} -> + maps:from_keys(authn() -- [emqx_authn_enable], 0) + end. + +%%==================== +%% Authn users count + +authn_users_count() -> + [emqx_authn_users_count]. + +-define(AUTHN_MNESIA, emqx_authn_mnesia). +-define(AUTHN_SCRAM_MNESIA, emqx_authn_scram_mnesia). + +authn_users_count_data() -> + Samples = lists:foldl( + fun + (#{backend := built_in_database, mechanism := password_based} = Authn, AccIn) -> + [auth_data_sample_point(authn, Authn, ?AUTHN_MNESIA) | AccIn]; + (#{backend := built_in_database, mechanism := scram} = Authn, AccIn) -> + [auth_data_sample_point(authn, Authn, ?AUTHN_SCRAM_MNESIA) | AccIn]; + (_, AccIn) -> + AccIn + end, + [], + emqx_config:get([authentication]) + ), + #{emqx_authn_users_count => Samples}. + +%%======================================== +%% AuthZ (Authorization) +%%======================================== + +%%==================== +%% Authz overview +authz() -> + [ + emqx_authz_enable, + emqx_authz_status, + emqx_authz_nomatch, + emqx_authz_total, + emqx_authz_success, + emqx_authz_failed, + emqx_authz_rate, + emqx_authz_rate_last5m, + emqx_authz_rate_max + ]. + +-spec authz_data() -> #{Key => [Point]} when + Key :: authz_metric_key(), + Point :: {[Label], Metric}, + Label :: TypeLabel, + TypeLabel :: {type, AuthZType :: binary()}, + Metric :: number(). +authz_data() -> + Authzs = emqx_config:get([authorization, sources]), + lists:foldl( + fun(Key, AccIn) -> + AccIn#{Key => authz_backend_to_points(Key, Authzs)} + end, + #{}, + authz() + ). + +-spec authz_backend_to_points(Key, list(Authz)) -> list(Point) when + Key :: authz_metric_key(), + Authz :: map(), + Point :: {[Label], Metric}, + Label :: TypeLabel, + TypeLabel :: {type, AuthZType :: binary()}, + Metric :: number(). +authz_backend_to_points(Key, Authzs) -> + do_authz_backend_to_points(Key, Authzs, []). + +do_authz_backend_to_points(_K, [], AccIn) -> + lists:reverse(AccIn); +do_authz_backend_to_points(K, [Authz | Rest], AccIn) -> + Type = maps:get(type, Authz), + Point = {[{type, Type}], do_metric(K, Authz, lookup_authz_metrics_local(Type))}, + do_authz_backend_to_points(K, Rest, [Point | AccIn]). + +lookup_authz_metrics_local(Type) -> + case emqx_authz_api_sources:lookup_from_local_node(Type) of + {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + #{ + emqx_authz_status => status_to_number(Status), + emqx_authz_nomatch => ?MG0(nomatch, Counters), + emqx_authz_total => ?MG0(total, Counters), + emqx_authz_success => ?MG0(success, Counters), + emqx_authz_failed => ?MG0(failed, Counters), + emqx_authz_rate => ?MG0(current, Rate), + emqx_authz_rate_last5m => ?MG0(last5m, Rate), + emqx_authz_rate_max => ?MG0(max, Rate) + }; + {error, _Reason} -> + maps:from_keys(authz() -- [emqx_authz_enable], 0) + end. + +%%==================== +%% Authz rules count + +authz_rules_count() -> + [emqx_authz_rules_count]. + +-define(ACL_TABLE, emqx_acl). + +authz_rules_count_data() -> + Samples = lists:foldl( + fun + (#{type := built_in_database} = Authz, AccIn) -> + [auth_data_sample_point(authz, Authz, ?ACL_TABLE) | AccIn]; + (#{type := file}, AccIn) -> + #{annotations := #{rules := Rules}} = emqx_authz:lookup(file), + Size = erlang:length(Rules), + [{[{type, file}], Size} | AccIn]; + (_, AccIn) -> + AccIn + end, + [], + emqx_config:get([authorization, sources]) + ), + #{emqx_authz_rules_count => Samples}. + +%%======================================== +%% Banned +%%======================================== + +%%==================== +%% Banned count + +banned() -> + [emqx_banned_count]. + +-define(BANNED_TABLE, emqx_banned). +banned_count_data() -> + mnesia_size(?BANNED_TABLE). + +%%-------------------------------------------------------------------- +%% Helper functions +%%-------------------------------------------------------------------- + +authenticator_id(Authn) -> + emqx_authn_chains:authenticator_id(Authn). + +auth_data_sample_point(authn, Authn, Tab) -> + Size = mnesia_size(Tab), + Id = authenticator_id(Authn), + {[{id, Id}], Size}; +auth_data_sample_point(authz, #{type := Type} = _Authz, Tab) -> + Size = mnesia_size(Tab), + {[{type, Type}], Size}. + +mnesia_size(Tab) -> + mnesia:table_info(Tab, size). + +do_metric(emqx_authn_enable, #{enable := B}, _) -> + boolean_to_number(B); +do_metric(K, _, Metrics) -> + ?MG0(K, Metrics). + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +status_to_number(stopped) -> 0. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_config.erl b/apps/emqx_prometheus/src/emqx_prometheus_config.erl index a24b52537..bf7e747c8 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_config.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_config.erl @@ -101,7 +101,7 @@ post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) -> ok. update_prometheus(AppEnvs) -> - PrevCollectors = prometheus_registry:collectors(default), + PrevCollectors = all_collectors(), CurCollectors = proplists:get_value(collectors, proplists:get_value(prometheus, AppEnvs)), lists:foreach( fun prometheus_registry:deregister_collector/1, @@ -113,6 +113,15 @@ update_prometheus(AppEnvs) -> ), application:set_env(AppEnvs). +all_collectors() -> + lists:foldl( + fun(Registry, AccIn) -> + prometheus_registry:collectors(Registry) ++ AccIn + end, + _InitAcc = [], + ?PROMETHEUS_ALL_REGISTRYS + ). + update_push_gateway(Prometheus) -> case is_push_gateway_server_enabled(Prometheus) of true -> diff --git a/rel/i18n/emqx_prometheus_api.hocon b/rel/i18n/emqx_prometheus_api.hocon index 0d9b5dc5f..89999fdd7 100644 --- a/rel/i18n/emqx_prometheus_api.hocon +++ b/rel/i18n/emqx_prometheus_api.hocon @@ -15,4 +15,9 @@ get_prom_data.desc: get_prom_data.label: """Prometheus Metrics""" +get_prom_auth_data.desc: +"""Get Prometheus Metrics for AuthN, AuthZ and Banned""" +get_prom_auth_data.label: +"""Prometheus Metrics for Auth""" + } From bf2e4d134a6db126611394b13e78de49f1bde36f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 04:25:53 +0800 Subject: [PATCH 18/89] refactor(prometheus): generic api response funcs --- .../src/emqx_prometheus_api.erl | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 1017dd16b..280f1aa8d 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -19,6 +19,7 @@ -behaviour(minirest_api). -include_lib("hocon/include/hoconsc.hrl"). +-include_lib("emqx/include/logger.hrl"). -export([ api_spec/0, @@ -115,37 +116,40 @@ setting(put, #{body := Body}) -> end. stats(get, #{headers := Headers}) -> - Type = - case maps:get(<<"accept">>, Headers, <<"text/plain">>) of - <<"application/json">> -> <<"json">>; - _ -> <<"prometheus">> - end, - Data = emqx_prometheus:collect(Type), - case Type of - <<"json">> -> - {200, Data}; - <<"prometheus">> -> - {200, #{<<"content-type">> => <<"text/plain">>}, Data} - end. + collect(emqx_prometheus, Headers). auth(get, #{headers := Headers}) -> - Type = - case maps:get(<<"accept">>, Headers, <<"text/plain">>) of - <<"application/json">> -> <<"json">>; - _ -> <<"prometheus">> - end, - Data = emqx_prometheus_auth:collect(Type), - case Type of - <<"json">> -> - {200, Data}; - <<"prometheus">> -> - {200, #{<<"content-type">> => <<"text/plain">>}, Data} - end. + collect(emqx_prometheus_auth, Headers). %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- +collect(Module, Headers) -> + Type = response_type(Headers), + Data = + case erlang:function_exported(Module, collect, 1) of + true -> + erlang:apply(Module, collect, [Type]); + false -> + ?SLOG(error, #{ + msg => "prometheus callback module not found, empty data responded", + module_name => Module + }), + <<>> + end, + gen_response(Type, Data). + +response_type(#{<<"accept">> := <<"application/json">>}) -> + <<"json">>; +response_type(_) -> + <<"prometheus">>. + +gen_response(<<"json">>, Data) -> + {200, Data}; +gen_response(<<"prometheus">>, Data) -> + {200, #{<<"content-type">> => <<"text/plain">>}, Data}. + prometheus_setting_request() -> [{prometheus, #{type := Setting}}] = emqx_prometheus_schema:roots(), emqx_dashboard_swagger:schema_with_examples( From 5158395bcf172c3493b9302d73dcccea63559fa8 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 04:39:36 +0800 Subject: [PATCH 19/89] feat(prometheus): data integration prom data --- apps/emqx_conf/src/emqx_conf_schema.erl | 1 + .../src/emqx_prometheus_api.erl | 21 +- .../src/emqx_prometheus_data_integration.erl | 491 ++++++++++++++++++ rel/i18n/emqx_prometheus_api.hocon | 5 + 4 files changed, 516 insertions(+), 2 deletions(-) create mode 100644 apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index 571f5785b..abb2e14e3 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1107,6 +1107,7 @@ tr_prometheus_collectors(Conf) -> %% emqx collectors emqx_prometheus, {'/prometheus/auth', emqx_prometheus_auth}, + {'/prometheus/data_integration', emqx_prometheus_data_integration}, emqx_prometheus_mria %% builtin vm collectors | prometheus_collectors(Conf) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 280f1aa8d..32cb89177 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -30,7 +30,8 @@ -export([ setting/2, stats/2, - auth/2 + auth/2, + data_integration/2 ]). -define(TAGS, [<<"Monitor">>]). @@ -42,7 +43,8 @@ paths() -> [ "/prometheus", "/prometheus/auth", - "/prometheus/stats" + "/prometheus/stats", + "/prometheus/data_integration" ]. schema("/prometheus") -> @@ -87,6 +89,18 @@ schema("/prometheus/stats") -> responses => #{200 => prometheus_data_schema()} } + }; +schema("/prometheus/data_integration") -> + #{ + 'operationId' => data_integration, + get => + #{ + description => ?DESC(get_prom_data_integration_data), + tags => ?TAGS, + security => security(), + responses => + #{200 => prometheus_data_schema()} + } }. security() -> @@ -121,6 +135,9 @@ stats(get, #{headers := Headers}) -> auth(get, #{headers := Headers}) -> collect(emqx_prometheus_auth, Headers). +data_integration(get, #{headers := Headers}) -> + collect(emqx_prometheus_data_integration, Headers). + %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl new file mode 100644 index 000000000..4c679b842 --- /dev/null +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -0,0 +1,491 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_prometheus_data_integration). + +-export([ + deregister_cleanup/1, + collect_mf/2, + collect_metrics/2 +]). + +-export([collect/1]). + +-export([add_collect_family/4]). + +-export([ + rules/0, + rules_data/1, + actions/0, + actions_data/1, + actions_exec_count/0, + actions_exec_count_data/0, + schema_registry/0, + schema_registry_data/0, + connectors/0, + connectors_data/0, + rule_specific/0, + rule_specific_data/1, + action_specific/0, + action_specific_data/0, + connector_specific/0, + connector_specific_data/0 +]). + +-include("emqx_prometheus.hrl"). +-include_lib("prometheus/include/prometheus.hrl"). + +-import( + prometheus_model_helpers, + [ + create_mf/5, + gauge_metric/1, + gauge_metrics/1 + ] +). + +%% Please don't remove this attribute, prometheus uses it to +%% automatically register collectors. +-behaviour(prometheus_collector). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +-define(METRIC_NAME_PREFIX, "emqx_data_integration_"). + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(MG0(K, MAP), maps:get(K, MAP, 0)). + +%%-------------------------------------------------------------------- +%% Collector API +%%-------------------------------------------------------------------- + +%% @private +deregister_cleanup(_) -> ok. + +%% @private +-spec collect_mf(_Registry, Callback) -> ok when + _Registry :: prometheus_registry:registry(), + Callback :: prometheus_collector:collect_mf_callback(). +%% erlfmt-ignore +collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> + Rules = emqx_rule_engine:get_rules(), + _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], + _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], + _ = [add_collect_family(Name, schema_registry_data(), Callback, gauge) || Name <- schema_registry()], + _ = [add_collect_family(Name, connectors_data(), Callback, gauge) || Name <- connectors()], + _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], + _ = [add_collect_family(Name, action_specific_data(), Callback, gauge) || Name <- action_specific()], + + ok; +collect_mf(_, _) -> + ok. + +%% @private +collect(<<"json">>) -> + %% TODO + #{}; +collect(<<"prometheus">>) -> + prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). + +add_collect_family(Name, Data, Callback, Type) -> + Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). + +collect_metrics(Name, Metrics) -> + collect_di(Name, Metrics). + +%%-------------------------------------------------------------------- +%% Collector +%%-------------------------------------------------------------------- + +%%======================================== +%% Data Integration Overview +%%======================================== + +%%==================== +%% All Rules +%% Rules +collect_di(K = emqx_rule_count, Data) -> + gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_matched_rate, Data) -> + gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_matched_rate_last5m, Data) -> + gauge_metric(?MG(K, Data)); +%%==================== +%% All Actions +collect_di(K = emqx_rules_actions_rate, Data) -> + gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_actions_rate_last5m, Data) -> + gauge_metric(?MG(K, Data)); +%%==================== +%% Schema Registry +collect_di(K = emqx_schema_registry_count, Data) -> + gauge_metric(?MG(K, Data)); +%%==================== +%% Connectors +collect_di(K = emqx_connector_count, Data) -> + gauge_metric(?MG(K, Data)); +%%======================================== +%% Data Integration for Specific: Rule && Action && Connector +%%======================================== + +%%==================== +%% Specific Rule +collect_di(K = emqx_rule_matched, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_passed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_exception, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_no_result, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_total, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_unknown, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched_rate_max, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Specific Action + +collect_di(K = emqx_action_matched, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_inflight, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_received, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_late_reply, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_stopped, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_not_found, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_queue_full, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_other, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_expired, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_queuing, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_rate_max, Data) -> + gauge_metrics(?MG(K, Data)). + +%%==================== +%% Specific Connector + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +%%======================================== +%% Data Integration Overview +%%======================================== + +%%==================== +%% All Rules + +rules() -> + [ + emqx_rule_count, + emqx_rules_matched_rate, + emqx_rules_matched_rate_last5m + ]. + +-define(RULE_TAB, emqx_rule_engine). + +rules_data(Rules) -> + Rate = lists:foldl( + fun( + #{id := Id}, + #{emqx_rules_matched_rate := Rate, emqx_rules_matched_rate_last5m := RateLast5m} = AccIn + ) -> + RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), + AccIn#{ + emqx_rules_matched_rate => Rate + + emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), + emqx_rules_matched_rate_last5m => RateLast5m + + emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) + } + end, + _InitAcc = maps:from_keys(rules(), 0), + Rules + ), + Rate#{emqx_rule_count => ets:info(?RULE_TAB, size)}. + +%%==================== +%% All Actions + +actions() -> + [ + emqx_rules_actions_rate, + emqx_rules_actions_rate_last5m + ]. + +actions_data(Rules) -> + lists:foldl( + fun( + #{id := Id}, + #{emqx_rules_actions_rate := Rate, emqx_rules_actions_rate_last5m := RateLast5m} = + _AccIn + ) -> + RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), + _AccIn#{ + emqx_rules_actions_rate => Rate + + emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), + emqx_rules_actions_rate_last5m => RateLast5m + + emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) + } + end, + _InitAcc = maps:from_keys(actions(), 0), + Rules + ). + +actions_exec_count() -> + [ + emqx_action_sink, + emqx_action_source + ]. + +actions_exec_count_data() -> + []. + +%%==================== +%% Schema Registry + +schema_registry() -> + [ + emqx_schema_registry_count + ]. + +schema_registry_data() -> + #{ + emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) + }. + +%%==================== +%% Connectors + +connectors() -> + [ + emqx_connector_count + ]. + +connectors_data() -> + #{ + emqx_connector_count => + lists:foldl( + fun(List, AccIn) -> erlang:length(List) + AccIn end, + 0, + [ + emqx_connector:list(), emqx_bridge:list(), emqx_bridge_v2:list() + ] + ) + }. + +%%======================================== +%% Data Integration for Specific: Rule && Action && Connector +%%======================================== + +%%==================== +%% Specific Rule +%% With rule_id as label key: `rule_id` + +rule_specific() -> + [ + emqx_rule_matched, + emqx_rule_failed, + emqx_rule_passed, + emqx_rule_failed_exception, + emqx_rule_failed_no_result, + emqx_rule_actions_total, + emqx_rule_actions_success, + emqx_rule_actions_failed, + emqx_rule_actions_failed_out_of_service, + emqx_rule_actions_failed_unknown, + emqx_rule_matched_rate, + emqx_rule_matched_rate_last5m, + emqx_rule_matched_rate_max + ]. + +rule_specific_data(Rules) -> + lists:foldl( + fun(#{id := Id} = Rule, AccIn) -> + merge_acc_with_rules(Id, get_metric(Rule), AccIn) + end, + maps:from_keys(rule_specific(), []), + Rules + ). + +merge_acc_with_rules(Id, RuleMetrics, PointsAcc) -> + maps:fold( + fun(K, V, AccIn) -> + AccIn#{K => [rule_point(Id, V) | ?MG(K, AccIn)]} + end, + PointsAcc, + RuleMetrics + ). + +rule_point(Id, V) -> + {[{rule_id, Id}], V}. + +get_metric(#{id := Id} = _Rule) -> + case emqx_metrics_worker:get_metrics(rule_metrics, Id) of + #{counters := Counters, rate := #{matched := MatchedRate}} -> + #{ + emqx_rule_matched => ?MG(matched, Counters), + emqx_rule_failed => ?MG(failed, Counters), + emqx_rule_passed => ?MG(passed, Counters), + emqx_rule_failed_exception => ?MG('failed.exception', Counters), + emqx_rule_failed_no_result => ?MG('failed.no_result', Counters), + emqx_rule_actions_total => ?MG('actions.total', Counters), + emqx_rule_actions_success => ?MG('actions.success', Counters), + emqx_rule_actions_failed => ?MG('actions.failed', Counters), + emqx_rule_actions_failed_out_of_service => ?MG( + 'actions.failed.out_of_service', Counters + ), + emqx_rule_actions_failed_unknown => ?MG('actions.failed.unknown', Counters), + emqx_rule_matched_rate => ?MG(current, MatchedRate), + emqx_rule_matched_rate_last5m => ?MG(last5m, MatchedRate), + emqx_rule_matched_rate_max => ?MG(max, MatchedRate) + } + end. + +%%==================== +%% Specific Action +%% With action_id: `{type}:{name}` as label key: `action_id` + +action_specific() -> + [ + emqx_action_matched, + emqx_action_dropped, + emqx_action_success, + emqx_action_failed, + emqx_action_rate, + emqx_action_inflight, + emqx_action_received, + emqx_action_late_reply, + emqx_action_retried, + emqx_action_retried_success, + emqx_action_retried_failed, + emqx_action_dropped_resource_stopped, + emqx_action_dropped_resource_not_found, + emqx_action_dropped_queue_full, + emqx_action_dropped_other, + emqx_action_dropped_expired, + emqx_action_queuing, + emqx_action_rate_last5m, + emqx_action_rate_max + ]. + +action_specific_data() -> + lists:foldl( + fun(#{type := Type, name := Name} = _Bridge, AccIn) -> + Id = emqx_bridge_resource:bridge_id(Type, Name), + merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) + end, + maps:from_keys(action_specific(), []), + emqx_bridge:list() + ). + +merge_acc_with_bridges(Id, BridgeMetrics, PointsAcc) -> + maps:fold( + fun(K, V, AccIn) -> + AccIn#{K => [bridge_point(Id, V) | ?MG(K, AccIn)]} + end, + PointsAcc, + BridgeMetrics + ). + +bridge_point(Id, V) -> + {[{action_id, Id}], V}. + +get_bridge_metric(Type, Name) -> + case emqx_bridge:get_metrics(Type, Name) of + #{counters := Counters, rate := #{matched := MatchedRate}, gauges := Gauges} -> + #{ + emqx_action_matched => ?MG0(matched, Counters), + emqx_action_dropped => ?MG0(dropped, Counters), + emqx_action_success => ?MG0(success, Counters), + emqx_action_failed => ?MG0(failed, Counters), + emqx_action_rate => ?MG0(current, MatchedRate), + emqx_action_inflight => ?MG0(inflight, Gauges), + emqx_action_received => ?MG0(received, Counters), + emqx_action_late_reply => ?MG0(late_reply, Counters), + emqx_action_retried => ?MG0(retried, Counters), + emqx_action_retried_success => ?MG0('retried.success', Counters), + emqx_action_retried_failed => ?MG0('retried.failed', Counters), + emqx_action_dropped_resource_stopped => ?MG0('dropped.resource_stopped', Counters), + emqx_action_dropped_resource_not_found => ?MG0( + 'dropped.resource_not_found', Counters + ), + emqx_action_dropped_queue_full => ?MG0('dropped.queue_full', Counters), + emqx_action_dropped_other => ?MG0('dropped.other', Counters), + emqx_action_dropped_expired => ?MG0('dropped.expired', Counters), + emqx_action_queuing => ?MG0(queuing, Gauges), + emqx_action_rate_last5m => ?MG0(last5m, MatchedRate), + emqx_action_rate_max => ?MG0(max, MatchedRate) + } + end. + +%% TODO: Bridge V2 + +%%==================== +%% Specific Connector +%% With connector_id: `{type}:{name}` as label key: `connector_id` + +connector_specific() -> + [ + emqx_connector_enable, + emqx_connector_status + ]. + +connector_specific_data() -> + []. + +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Help funcs diff --git a/rel/i18n/emqx_prometheus_api.hocon b/rel/i18n/emqx_prometheus_api.hocon index 89999fdd7..0c48e3add 100644 --- a/rel/i18n/emqx_prometheus_api.hocon +++ b/rel/i18n/emqx_prometheus_api.hocon @@ -20,4 +20,9 @@ get_prom_auth_data.desc: get_prom_auth_data.label: """Prometheus Metrics for Auth""" +get_prom_data_integration_data.desc: +"""Get Prometheus Metrics for Data Integration""" +get_prom_data_integration_data.label: +"""Prometheus Metrics for Data Integration""" + } From 7832bbc0a43a8786c6deb684b489ea7e137a205e Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 23:55:44 +0800 Subject: [PATCH 20/89] fix(prom): schema registry not in ce edition --- .../src/emqx_prometheus_data_integration.erl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 4c679b842..5039cb9b6 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -295,6 +295,7 @@ actions_exec_count_data() -> %%==================== %% Schema Registry +-if(?EMQX_RELEASE_EDITION == ee). schema_registry() -> [ emqx_schema_registry_count @@ -304,6 +305,13 @@ schema_registry_data() -> #{ emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. +-else. +schema_registry() -> + []. + +schema_registry_data() -> + #{}. +-endif. %%==================== %% Connectors From 0dca9905dd41a493910e78c848e54885fdc7a58f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 00:41:54 +0800 Subject: [PATCH 21/89] test(prometheus): `{Registry, Collector}` tuple --- .../src/emqx_prometheus_config.erl | 4 ++++ .../test/emqx_prometheus_SUITE.erl | 5 ++++- .../test/emqx_prometheus_api_SUITE.erl | 21 +++++++++++++++---- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_config.erl b/apps/emqx_prometheus/src/emqx_prometheus_config.erl index bf7e747c8..f5140938c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_config.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_config.erl @@ -25,6 +25,10 @@ -export([conf/0, is_push_gateway_server_enabled/1]). -export([to_recommend_type/1]). +-ifdef(TEST). +-export([all_collectors/0]). +-endif. + update(Config) -> case emqx_conf:update( diff --git a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl index 496919b10..11ca49f89 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl @@ -103,13 +103,16 @@ init_group() -> ok = mria_rlog:wait_for_shards([?CLUSTER_RPC_SHARD], infinity), meck:new(emqx_alarm, [non_strict, passthrough, no_link]), meck:expect(emqx_alarm, activate, 3, ok), - meck:expect(emqx_alarm, deactivate, 3, ok). + meck:expect(emqx_alarm, deactivate, 3, ok), + meck:new(emqx_license_checker, [non_strict, passthrough, no_link]), + meck:expect(emqx_license_checker, expiry_epoch, fun() -> 1859673600 end). end_group() -> ekka:stop(), mria:stop(), mria_mnesia:delete_schema(), meck:unload(emqx_alarm), + meck:unload(emqx_license_checker), emqx_common_test_helpers:stop_apps([emqx_prometheus]). end_per_group(_Group, Config) -> diff --git a/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl index cc20e60c7..6092a5d54 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl @@ -128,8 +128,8 @@ t_legacy_prometheus_api(_) -> Conf2 = emqx_utils_json:decode(Response2, [return_maps]), ?assertEqual(NewConf, Conf2), - EnvCollectors = application:get_env(prometheus, collectors, []), - PromCollectors = prometheus_registry:collectors(default), + EnvCollectors = env_collectors(), + PromCollectors = all_collectors(), ?assertEqual(lists:sort(EnvCollectors), lists:sort(PromCollectors)), ?assert(lists:member(prometheus_vm_statistics_collector, EnvCollectors), EnvCollectors), @@ -221,8 +221,8 @@ t_prometheus_api(_) -> Conf2 = emqx_utils_json:decode(Response2, [return_maps]), ?assertMatch(NewConf, Conf2), - EnvCollectors = application:get_env(prometheus, collectors, []), - PromCollectors = prometheus_registry:collectors(default), + EnvCollectors = env_collectors(), + PromCollectors = all_collectors(), ?assertEqual(lists:sort(EnvCollectors), lists:sort(PromCollectors)), ?assert(lists:member(prometheus_vm_statistics_collector, EnvCollectors), EnvCollectors), @@ -308,3 +308,16 @@ request_stats(JsonAuth, Auth) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Internal Functions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +env_collectors() -> + do_env_collectors(application:get_env(prometheus, collectors, []), []). + +do_env_collectors([], Acc) -> + lists:reverse(Acc); +do_env_collectors([{_Registry, Collector} | Rest], Acc) when is_atom(Collector) -> + do_env_collectors(Rest, [Collector | Acc]); +do_env_collectors([Collector | Rest], Acc) when is_atom(Collector) -> + do_env_collectors(Rest, [Collector | Acc]). + +all_collectors() -> + emqx_prometheus_config:all_collectors(). From 38a90bd2732dbfd4647480a19a373a4d159ff242 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 02:45:18 +0800 Subject: [PATCH 22/89] fix(prom_auth): authn && authz data response by josn --- .../src/emqx_prometheus_auth.erl | 82 ++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 5257f225b..c7c65b2cb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -96,10 +96,26 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> %% TODO - #{}; + #{ + emqx_authn => collect_auth_data(authn), + emqx_authz => collect_auth_data(authz), + emqx_banned => collect_banned_data() + }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_AUTH_REGISTRY). +collect_auth_data(AuthDataType) -> + maps:fold( + fun(K, V, Acc) -> + zip_auth_metrics(AuthDataType, K, V, Acc) + end, + [], + auth_data(AuthDataType) + ). + +collect_banned_data() -> + #{emqx_banned_count => banned_count_data()}. + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). @@ -398,3 +414,67 @@ boolean_to_number(false) -> 0. status_to_number(connected) -> 1; status_to_number(stopped) -> 0. + +zip_auth_metrics(AuthDataType, K, V, Acc) -> + LabelK = label_key(AuthDataType), + UserOrRuleD = user_rule_data(AuthDataType), + do_zip_auth_metrics(LabelK, UserOrRuleD, K, V, Acc). + +do_zip_auth_metrics(LabelK, UserOrRuleD, Key, Points, [] = _AccIn) -> + lists:foldl( + fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> + %% for initialized empty AccIn + %% The following fields will be put into Result + %% For Authn: + %% `id`, `emqx_authn_users_count` + %% For Authz: + %% `type`, `emqx_authz_rules_count` + Point = (users_or_rule_count(LabelK, LabelV, UserOrRuleD))#{ + LabelK => LabelV, Key => Metric + }, + [Point | AccIn2] + end, + [], + Points + ); +do_zip_auth_metrics(LabelK, _UserOrRuleD, Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl( + fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> + [#{LabelK => Id, Key => Metric} | AccIn2] + end, + [], + Points + ), + lists:zipwith( + fun(AllResulted, ThisKeyMetricOut) -> + maps:merge(AllResulted, ThisKeyMetricOut) + end, + AllResultedAcc, + ThisKeyResult + ). + +auth_data(authn) -> authn_data(); +auth_data(authz) -> authz_data(). + +label_key(authn) -> id; +label_key(authz) -> type. + +user_rule_data(authn) -> authn_users_count_data(); +user_rule_data(authz) -> authz_rules_count_data(). + +users_or_rule_count(id, Id, #{emqx_authn_users_count := Points} = _AuthnUsersD) -> + case lists:keyfind([{id, Id}], 1, Points) of + {_, Metric} -> + #{emqx_authn_users_count => Metric}; + false -> + #{} + end; +users_or_rule_count(type, Type, #{emqx_authz_rules_count := Points} = _AuthzRulesD) -> + case lists:keyfind([{type, Type}], 1, Points) of + {_, Metric} -> + #{emqx_authz_rules_count => Metric}; + false -> + #{} + end; +users_or_rule_count(_, _, _) -> + #{}. From 4b23930fceaa27c3c9149ead6c3e71eedee502e3 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 10:14:51 +0800 Subject: [PATCH 23/89] fix(prometheus): license expiry and schema_registry only for ee --- apps/emqx_prometheus/src/emqx_prometheus.erl | 29 +++++++++++++++---- .../src/emqx_prometheus_data_integration.erl | 19 ++++++++---- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 7c3283043..286243c0c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -175,11 +175,9 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), - LicenseData = emqx_license_data(), ClusterData = emqx_cluster_data(), CertsData = emqx_certs_data(), %% TODO: license expiry epoch and cert expiry epoch should be cached - _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], _ = [add_collect_family(Name, CertsData, Callback, gauge) || Name <- emqx_certs()], _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], @@ -192,6 +190,7 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_olp()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_acl()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_authn()], + ok = maybe_collect_family_license(Callback), ok; collect_mf(_Registry, _Callback) -> ok. @@ -201,13 +200,11 @@ collect(<<"json">>) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), - LicenseData = emqx_license_data(), %% TODO: FIXME! %% emqx_metrics_olp()), %% emqx_metrics_acl()), %% emqx_metrics_authn()), - #{ - license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()]), + (maybe_collect_license())#{ certs => collect_certs_json(emqx_certs_data()), stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), @@ -237,6 +234,24 @@ collect_metrics(Name, Metrics) -> add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). +-if(?EMQX_RELEASE_EDITION == ee). +maybe_collect_family_license(Callback) -> + LicenseData = emqx_license_data(), + _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], + ok. + +maybe_collect_license() -> + LicenseData = emqx_license_data(), + #{license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()])}. + +-else. +maybe_collect_family_license(_) -> + ok. + +maybe_collect_license() -> + #{}. +-endif. + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -707,6 +722,7 @@ emqx_cluster_data() -> {nodes_stopped, length(Stopped)} ]. +-if(?EMQX_RELEASE_EDITION == ee). emqx_license() -> [ emqx_license_expiry_at @@ -716,6 +732,9 @@ emqx_license_data() -> [ {expiry_at, emqx_license_checker:expiry_epoch()} ]. +-else. + +-endif. emqx_certs() -> [ diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 5039cb9b6..092ed3a71 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -86,11 +86,10 @@ collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [add_collect_family(Name, schema_registry_data(), Callback, gauge) || Name <- schema_registry()], _ = [add_collect_family(Name, connectors_data(), Callback, gauge) || Name <- connectors()], _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], _ = [add_collect_family(Name, action_specific_data(), Callback, gauge) || Name <- action_specific()], - + ok = maybe_collect_family_schema_registry(Callback), ok; collect_mf(_, _) -> ok. @@ -108,6 +107,18 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). +-if(?EMQX_RELEASE_EDITION == ee). +maybe_collect_family_schema_registry(Callback) -> + _ = [ + add_collect_family(Name, schema_registry_data(), Callback, gauge) + || Name <- schema_registry() + ], + ok. +-else. +maybe_collect_family_schema_registry(_) -> + ok. +-endif. + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -306,11 +317,7 @@ schema_registry_data() -> emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. -else. -schema_registry() -> - []. -schema_registry_data() -> - #{}. -endif. %%==================== From f457def2010694a486d5886c09b9ccc7dedd8183 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 11:07:32 +0800 Subject: [PATCH 24/89] fix: use `id` uniformly as the label key for rules and actions --- .../src/emqx_prometheus_data_integration.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 092ed3a71..318b0fa16 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -384,7 +384,7 @@ merge_acc_with_rules(Id, RuleMetrics, PointsAcc) -> ). rule_point(Id, V) -> - {[{rule_id, Id}], V}. + {[{id, Id}], V}. get_metric(#{id := Id} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of @@ -448,14 +448,14 @@ action_specific_data() -> merge_acc_with_bridges(Id, BridgeMetrics, PointsAcc) -> maps:fold( fun(K, V, AccIn) -> - AccIn#{K => [bridge_point(Id, V) | ?MG(K, AccIn)]} + AccIn#{K => [action_point(Id, V) | ?MG(K, AccIn)]} end, PointsAcc, BridgeMetrics ). -bridge_point(Id, V) -> - {[{action_id, Id}], V}. +action_point(Id, V) -> + {[{id, Id}], V}. get_bridge_metric(Type, Name) -> case emqx_bridge:get_metrics(Type, Name) of From 76d9ace5829df572dd56718aa4718a8d0bfde773 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 11:28:54 +0800 Subject: [PATCH 25/89] fix: connector_count contains bridge_v1 and bridge_v2 --- .../src/emqx_prometheus_data_integration.erl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 318b0fa16..9840d2409 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -330,14 +330,8 @@ connectors() -> connectors_data() -> #{ - emqx_connector_count => - lists:foldl( - fun(List, AccIn) -> erlang:length(List) + AccIn end, - 0, - [ - emqx_connector:list(), emqx_bridge:list(), emqx_bridge_v2:list() - ] - ) + %% Both Bridge V1 and V2 + emqx_connector_count => erlang:length(emqx_bridge:list()) }. %%======================================== From 36f009b0c2fcc18d7a71498c04c0c7c76f4d1b7f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 13:49:38 +0800 Subject: [PATCH 26/89] fix(prom): connectors specific data --- .../src/emqx_prometheus_data_integration.erl | 65 +++++++++++++++---- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 9840d2409..3546697cc 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -36,13 +36,13 @@ schema_registry/0, schema_registry_data/0, connectors/0, - connectors_data/0, + connectors_data/1, rule_specific/0, rule_specific_data/1, action_specific/0, - action_specific_data/0, + action_specific_data/1, connector_specific/0, - connector_specific_data/0 + connector_specific_data/1 ]). -include("emqx_prometheus.hrl"). @@ -84,11 +84,13 @@ deregister_cleanup(_) -> ok. %% erlfmt-ignore collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), + Bridges =emqx_bridge:list(), _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [add_collect_family(Name, connectors_data(), Callback, gauge) || Name <- connectors()], + _ = [add_collect_family(Name, connectors_data(Bridges), Callback, gauge) || Name <- connectors()], _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], - _ = [add_collect_family(Name, action_specific_data(), Callback, gauge) || Name <- action_specific()], + _ = [add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) || Name <- action_specific()], + _ = [add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) || Name <- connector_specific()], ok = maybe_collect_family_schema_registry(Callback), ok; collect_mf(_, _) -> @@ -222,11 +224,15 @@ collect_di(K = emqx_action_queuing, Data) -> collect_di(K = emqx_action_rate_last5m, Data) -> gauge_metrics(?MG(K, Data)); collect_di(K = emqx_action_rate_max, Data) -> - gauge_metrics(?MG(K, Data)). - + gauge_metrics(?MG(K, Data)); %%==================== %% Specific Connector +collect_di(K = emqx_connector_enable, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_connector_status, Data) -> + gauge_metrics(?MG(K, Data)). + %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- @@ -328,10 +334,10 @@ connectors() -> emqx_connector_count ]. -connectors_data() -> +connectors_data(Brdiges) -> #{ %% Both Bridge V1 and V2 - emqx_connector_count => erlang:length(emqx_bridge:list()) + emqx_connector_count => erlang:length(Brdiges) }. %%======================================== @@ -429,14 +435,14 @@ action_specific() -> emqx_action_rate_max ]. -action_specific_data() -> +action_specific_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = _Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, maps:from_keys(action_specific(), []), - emqx_bridge:list() + Bridges ). merge_acc_with_bridges(Id, BridgeMetrics, PointsAcc) -> @@ -491,10 +497,43 @@ connector_specific() -> emqx_connector_status ]. -connector_specific_data() -> - []. +connector_specific_data(Bridges) -> + lists:foldl( + fun(#{type := Type, name := Name} = Bridge, AccIn) -> + Id = emqx_bridge_resource:bridge_id(Type, Name), + merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) + end, + maps:from_keys(connector_specific(), []), + Bridges + ). + +merge_acc_with_connectors(Id, ConnectorMetrics, PointsAcc) -> + maps:fold( + fun(K, V, AccIn) -> + AccIn#{K => [connector_point(Id, V) | ?MG(K, AccIn)]} + end, + PointsAcc, + ConnectorMetrics + ). + +connector_point(Id, V) -> + {[{id, Id}], V}. + +get_connector_status(#{resource_data := ResourceData} = _Bridge) -> + Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), + Status = ?MG(status, ResourceData), + #{ + emqx_connector_enable => boolean_to_number(Enabled), + emqx_connector_status => status_to_number(Status) + }. %%-------------------------------------------------------------------- %%-------------------------------------------------------------------- %% Help funcs + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +status_to_number(disconnected) -> 0. From 8f7964f435306e45ecc7d02dda3023d9b1fb2ea7 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 14:41:26 +0800 Subject: [PATCH 27/89] feat(prom): data integration metrics in josn format --- .../src/emqx_prometheus_auth.erl | 3 +- .../src/emqx_prometheus_data_integration.erl | 159 ++++++++++++++---- 2 files changed, 130 insertions(+), 32 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index c7c65b2cb..06d6246f1 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -95,7 +95,6 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - %% TODO #{ emqx_authn => collect_auth_data(authn), emqx_authz => collect_auth_data(authz), diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 3546697cc..3cdc3a01c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -26,24 +26,7 @@ -export([add_collect_family/4]). --export([ - rules/0, - rules_data/1, - actions/0, - actions_data/1, - actions_exec_count/0, - actions_exec_count_data/0, - schema_registry/0, - schema_registry_data/0, - connectors/0, - connectors_data/1, - rule_specific/0, - rule_specific_data/1, - action_specific/0, - action_specific_data/1, - connector_specific/0, - connector_specific_data/1 -]). +-export([actions_exec_count/0, actions_exec_count_data/0]). -include("emqx_prometheus.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -81,34 +64,97 @@ deregister_cleanup(_) -> ok. -spec collect_mf(_Registry, Callback) -> ok when _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). -%% erlfmt-ignore collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), - Bridges =emqx_bridge:list(), + Bridges = emqx_bridge:list(), + %% Data Integration Overview _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [add_collect_family(Name, connectors_data(Bridges), Callback, gauge) || Name <- connectors()], - _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], - _ = [add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) || Name <- action_specific()], - _ = [add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) || Name <- connector_specific()], + _ = [ + add_collect_family(Name, connectors_data(Bridges), Callback, gauge) + || Name <- connectors() + ], ok = maybe_collect_family_schema_registry(Callback), + + %% Rule Specific + _ = [ + add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) + || Name <- rule_specific() + ], + + %% Action Specific + _ = [ + add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) + || Name <- action_specific() + ], + + %% Connector Specific + _ = [ + add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) + || Name <- connector_specific() + ], + ok; collect_mf(_, _) -> ok. %% @private collect(<<"json">>) -> - %% TODO - #{}; + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + #{ + data_integration_overview => collect_data_integration(overview, {Rules, Bridges}), + rules => collect_data_integration(rules, Rules), + actions => collect_data_integration(actions, Bridges), + connectors => collect_data_integration(connectors, Bridges) + }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). +collect_data_integration(overview, {Rules, Bridges}) -> + RulesD = rules_data(Rules), + ActionsD = actions_data(Rules), + ConnectorsD = connectors_data(Bridges), + + M1 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, rules()), + M2 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ActionsD)} end, #{}, actions()), + M3 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, connectors()), + M4 = maybe_collect_schema_registry(), + + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]); +collect_data_integration(Type = rules, Rules) -> + maps:fold( + fun(K, V, Acc) -> + zip_metrics(Type, K, V, Acc) + end, + [], + di_data(Type, Rules) + ); +collect_data_integration(Type = actions, Rules) -> + maps:fold( + fun(K, V, Acc) -> + zip_metrics(Type, K, V, Acc) + end, + [], + di_data(Type, Rules) + ); +collect_data_integration(Type = connectors, Bridges) -> + maps:fold( + fun(K, V, Acc) -> + zip_metrics(Type, K, V, Acc) + end, + [], + di_data(Type, Bridges) + ). + -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> _ = [ @@ -116,9 +162,15 @@ maybe_collect_family_schema_registry(Callback) -> || Name <- schema_registry() ], ok. + +maybe_collect_schema_registry() -> + schema_registry_data(). -else. maybe_collect_family_schema_registry(_) -> ok. + +maybe_collect_schema_registry() -> + #{}. -endif. %%-------------------------------------------------------------------- @@ -307,7 +359,7 @@ actions_exec_count() -> ]. actions_exec_count_data() -> - []. + #{}. %%==================== %% Schema Registry @@ -485,8 +537,6 @@ get_bridge_metric(Type, Name) -> } end. -%% TODO: Bridge V2 - %%==================== %% Specific Connector %% With connector_id: `{type}:{name}` as label key: `connector_id` @@ -537,3 +587,52 @@ boolean_to_number(false) -> 0. status_to_number(connected) -> 1; status_to_number(disconnected) -> 0. + +zip_metrics(Type, K, V, Acc) -> + LabelK = label_key(Type), + do_zip_metrics(LabelK, K, V, Acc). + +do_zip_metrics(LabelK, Key, Points, [] = _AccIn) -> + lists:foldl( + fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> + %% for initialized empty AccIn + %% The following fields will be put into Result + %% For Rules: + %% `id` => [RULE_ID] + %% For Actions + %% `id` => [ACTION_ID] + %% FOR Connectors + %% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID + %% formatted with {type}:{name} + Point = + #{ + LabelK => LabelV, Key => Metric + }, + [Point | AccIn2] + end, + [], + Points + ); +do_zip_metrics(LabelK, Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl( + fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> + [#{LabelK => Id, Key => Metric} | AccIn2] + end, + [], + Points + ), + lists:zipwith( + fun(AllResulted, ThisKeyMetricOut) -> + maps:merge(AllResulted, ThisKeyMetricOut) + end, + AllResultedAcc, + ThisKeyResult + ). + +di_data(rules, Rules) -> rule_specific_data(Rules); +di_data(actions, Bridges) -> action_specific_data(Bridges); +di_data(connectors, Bridges) -> connector_specific_data(Bridges). + +label_key(rules) -> id; +label_key(actions) -> id; +label_key(connectors) -> id. From a18c4d193aa360930bddb1ae0b2ca63c43c0344c Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 15:32:29 +0800 Subject: [PATCH 28/89] refactor: abstract function call --- .../src/emqx_prometheus_data_integration.erl | 31 ++++++------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 3cdc3a01c..72fd7a6e9 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -103,7 +103,7 @@ collect(<<"json">>) -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ - data_integration_overview => collect_data_integration(overview, {Rules, Bridges}), + data_integration_overview => collect_data_integration_overview(Rules, Bridges), rules => collect_data_integration(rules, Rules), actions => collect_data_integration(actions, Bridges), connectors => collect_data_integration(connectors, Bridges) @@ -111,15 +111,17 @@ collect(<<"json">>) -> collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%==================== +%% API Helpers add_collect_family(Name, Data, Callback, Type) -> + %% TODO: help document from Name Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). -collect_data_integration(overview, {Rules, Bridges}) -> +collect_data_integration_overview(Rules, Bridges) -> RulesD = rules_data(Rules), ActionsD = actions_data(Rules), ConnectorsD = connectors_data(Bridges), @@ -129,30 +131,15 @@ collect_data_integration(overview, {Rules, Bridges}) -> M3 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, connectors()), M4 = maybe_collect_schema_registry(), - lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]); -collect_data_integration(Type = rules, Rules) -> + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]). + +collect_data_integration(Type, DataSeed) -> maps:fold( fun(K, V, Acc) -> zip_metrics(Type, K, V, Acc) end, [], - di_data(Type, Rules) - ); -collect_data_integration(Type = actions, Rules) -> - maps:fold( - fun(K, V, Acc) -> - zip_metrics(Type, K, V, Acc) - end, - [], - di_data(Type, Rules) - ); -collect_data_integration(Type = connectors, Bridges) -> - maps:fold( - fun(K, V, Acc) -> - zip_metrics(Type, K, V, Acc) - end, - [], - di_data(Type, Bridges) + di_data(Type, DataSeed) ). -if(?EMQX_RELEASE_EDITION == ee). From c3e9533260c852bb256a7d0824920b9c7e7a5fdf Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 16 Jan 2024 19:45:04 +0800 Subject: [PATCH 29/89] fix: prometheus auth metrics fields and type - rm rate fields - fix few fields type to counter --- .../src/emqx_prometheus_auth.erl | 161 ++++++++---------- 1 file changed, 74 insertions(+), 87 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 06d6246f1..57406d2d2 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -32,31 +32,26 @@ [ create_mf/5, gauge_metric/1, - gauge_metrics/1 + gauge_metrics/1, + counter_metrics/1 ] ). --type authn_metric_key() :: +-type authn_metric_name() :: emqx_authn_enable | emqx_authn_status | emqx_authn_nomatch | emqx_authn_total | emqx_authn_success - | emqx_authn_failed - | emqx_authn_rate - | emqx_authn_rate_last5m - | emqx_authn_rate_max. + | emqx_authn_failed. --type authz_metric_key() :: +-type authz_metric_name() :: emqx_authz_enable | emqx_authz_status | emqx_authz_nomatch | emqx_authz_total | emqx_authz_success - | emqx_authz_failed - | emqx_authz_rate - | emqx_authz_rate_last5m - | emqx_authz_rate_max. + | emqx_authz_failed. %% Please don't remove this attribute, prometheus uses it to %% automatically register collectors. @@ -71,6 +66,36 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). +-define(AUTHNS_WITH_TYPE, [ + {emqx_authn_enable, gauge}, + {emqx_authn_status, gauge}, + {emqx_authn_nomatch, counter}, + {emqx_authn_total, counter}, + {emqx_authn_success, counter}, + {emqx_authn_failed, counter} +]). + +-define(AUTHZS_WITH_TYPE, [ + {emqx_authz_enable, gauge}, + {emqx_authz_status, gauge}, + {emqx_authz_nomatch, counter}, + {emqx_authz_total, counter}, + {emqx_authz_success, counter}, + {emqx_authz_failed, counter} +]). + +-define(AUTHN_USERS_COUNT_WITH_TYPE, [ + {emqx_authn_users_count, gauge} +]). + +-define(AUTHZ_RULES_COUNT_WITH_TYPE, [ + {emqx_authz_rules_count, gauge} +]). + +-define(BANNED_WITH_TYPE, [ + {emqx_banned_count, gauge} +]). + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -84,11 +109,11 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - _ = [add_collect_family(Name, authn_data(), Callback, gauge) || Name <- authn()], - _ = [add_collect_family(Name, authn_users_count_data(), Callback, gauge) || Name <- authn_users_count()], - _ = [add_collect_family(Name, authz_data(), Callback, gauge) || Name <- authz()], - _ = [add_collect_family(Name, authz_rules_count_data(), Callback, gauge) || Name <- authz_rules_count()], - _ = [add_collect_family(Name, banned_count_data(), Callback, gauge) || Name <- banned()], + ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, authn_data()), + ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, authn_users_count_data()), + ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, authz_data()), + ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, authz_rules_count_data()), + ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, banned_count_data()), ok; collect_mf(_, _) -> ok. @@ -115,6 +140,10 @@ collect_auth_data(AuthDataType) -> collect_banned_data() -> #{emqx_banned_count => banned_count_data()}. +add_collect_family(Callback, MetricWithType, Data) -> + _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type} <- MetricWithType], + ok. + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). @@ -132,19 +161,13 @@ collect_auth(K = emqx_authn_enable, Data) -> collect_auth(K = emqx_authn_status, Data) -> gauge_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_nomatch, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_total, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_failed, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authn_rate, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authn_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authn_rate_max, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); %%==================== %% Authn users count %% Only provided for `password_based:built_in_database` and `scram:built_in_database` @@ -157,19 +180,13 @@ collect_auth(K = emqx_authz_enable, Data) -> collect_auth(K = emqx_authz_status, Data) -> gauge_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_nomatch, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_total, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_failed, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_rate, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_rate_max, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); %%==================== %% Authz rules count %% Only provided for `file` and `built_in_database` @@ -190,21 +207,9 @@ collect_auth(emqx_banned_count, Data) -> %%==================== %% Authn overview -authn() -> - [ - emqx_authn_enable, - emqx_authn_status, - emqx_authn_nomatch, - emqx_authn_total, - emqx_authn_success, - emqx_authn_failed, - emqx_authn_rate, - emqx_authn_rate_last5m, - emqx_authn_rate_max - ]. -spec authn_data() -> #{Key => [Point]} when - Key :: authn_metric_key(), + Key :: authn_metric_name(), Point :: {[Label], Metric}, Label :: IdLabel, IdLabel :: {id, AuthnName :: binary()}, @@ -216,11 +221,11 @@ authn_data() -> AccIn#{Key => authn_backend_to_points(Key, Authns)} end, #{}, - authn() + authn_metric_names() ). -spec authn_backend_to_points(Key, list(Authn)) -> list(Point) when - Key :: authn_metric_key(), + Key :: authn_metric_name(), Authn :: map(), Point :: {[Label], Metric}, Label :: IdLabel, @@ -238,27 +243,24 @@ do_authn_backend_to_points(K, [Authn | Rest], AccIn) -> lookup_authn_metrics_local(Id) -> case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of - {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ emqx_authn_status => status_to_number(Status), emqx_authn_nomatch => ?MG0(nomatch, Counters), emqx_authn_total => ?MG0(total, Counters), emqx_authn_success => ?MG0(success, Counters), - emqx_authn_failed => ?MG0(failed, Counters), - emqx_authn_rate => ?MG0(current, Rate), - emqx_authn_rate_last5m => ?MG0(last5m, Rate), - emqx_authn_rate_max => ?MG0(max, Rate) + emqx_authn_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authn() -- [emqx_authn_enable], 0) + maps:from_keys(authn_metric_names() -- [emqx_authn_enable], 0) end. +authn_metric_names() -> + metric_names(?AUTHNS_WITH_TYPE). + %%==================== %% Authn users count -authn_users_count() -> - [emqx_authn_users_count]. - -define(AUTHN_MNESIA, emqx_authn_mnesia). -define(AUTHN_SCRAM_MNESIA, emqx_authn_scram_mnesia). @@ -283,21 +285,9 @@ authn_users_count_data() -> %%==================== %% Authz overview -authz() -> - [ - emqx_authz_enable, - emqx_authz_status, - emqx_authz_nomatch, - emqx_authz_total, - emqx_authz_success, - emqx_authz_failed, - emqx_authz_rate, - emqx_authz_rate_last5m, - emqx_authz_rate_max - ]. -spec authz_data() -> #{Key => [Point]} when - Key :: authz_metric_key(), + Key :: authz_metric_name(), Point :: {[Label], Metric}, Label :: TypeLabel, TypeLabel :: {type, AuthZType :: binary()}, @@ -309,11 +299,11 @@ authz_data() -> AccIn#{Key => authz_backend_to_points(Key, Authzs)} end, #{}, - authz() + authz_metric_names() ). -spec authz_backend_to_points(Key, list(Authz)) -> list(Point) when - Key :: authz_metric_key(), + Key :: authz_metric_name(), Authz :: map(), Point :: {[Label], Metric}, Label :: TypeLabel, @@ -331,27 +321,24 @@ do_authz_backend_to_points(K, [Authz | Rest], AccIn) -> lookup_authz_metrics_local(Type) -> case emqx_authz_api_sources:lookup_from_local_node(Type) of - {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ emqx_authz_status => status_to_number(Status), emqx_authz_nomatch => ?MG0(nomatch, Counters), emqx_authz_total => ?MG0(total, Counters), emqx_authz_success => ?MG0(success, Counters), - emqx_authz_failed => ?MG0(failed, Counters), - emqx_authz_rate => ?MG0(current, Rate), - emqx_authz_rate_last5m => ?MG0(last5m, Rate), - emqx_authz_rate_max => ?MG0(max, Rate) + emqx_authz_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authz() -- [emqx_authz_enable], 0) + maps:from_keys(authz_metric_names() -- [emqx_authz_enable], 0) end. +authz_metric_names() -> + metric_names(?AUTHZS_WITH_TYPE). + %%==================== %% Authz rules count -authz_rules_count() -> - [emqx_authz_rules_count]. - -define(ACL_TABLE, emqx_acl). authz_rules_count_data() -> @@ -378,9 +365,6 @@ authz_rules_count_data() -> %%==================== %% Banned count -banned() -> - [emqx_banned_count]. - -define(BANNED_TABLE, emqx_banned). banned_count_data() -> mnesia_size(?BANNED_TABLE). @@ -477,3 +461,6 @@ users_or_rule_count(type, Type, #{emqx_authz_rules_count := Points} = _AuthzRule end; users_or_rule_count(_, _, _) -> #{}. + +metric_names(MetricWithType) when is_list(MetricWithType) -> + [Name || {Name, _Type} <- MetricWithType]. From 94032aafb212ad0d15a4056be24931f41930bac9 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 16 Jan 2024 22:59:00 +0800 Subject: [PATCH 30/89] fix(prom_data_integration): fix metric type --- .../src/emqx_prometheus_data_integration.erl | 338 +++++++----------- 1 file changed, 123 insertions(+), 215 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 72fd7a6e9..c41d9a6fb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -26,8 +26,6 @@ -export([add_collect_family/4]). --export([actions_exec_count/0, actions_exec_count_data/0]). - -include("emqx_prometheus.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -36,7 +34,8 @@ [ create_mf/5, gauge_metric/1, - gauge_metrics/1 + gauge_metrics/1, + counter_metrics/1 ] ). @@ -53,6 +52,58 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). +-define(RULES_WITH_TYPE, [ + {emqx_rules_count, gauge} +]). + +-define(CONNECTORS_WITH_TYPE, [ + {emqx_connectors_count, gauge} +]). + +-define(RULES_SPECIFIC_WITH_TYPE, [ + {emqx_rule_matched, counter}, + {emqx_rule_failed, counter}, + {emqx_rule_passed, counter}, + {emqx_rule_failed_exception, counter}, + {emqx_rule_failed_no_result, counter}, + {emqx_rule_actions_total, counter}, + {emqx_rule_actions_success, counter}, + {emqx_rule_actions_failed, counter}, + {emqx_rule_actions_failed_out_of_service, counter}, + {emqx_rule_actions_failed_unknown, counter} +]). + +-define(ACTION_SPECIFIC_WITH_TYPE, [ + {emqx_action_matched, counter}, + {emqx_action_dropped, counter}, + {emqx_action_success, counter}, + {emqx_action_failed, counter}, + {emqx_action_inflight, gauge}, + {emqx_action_received, counter}, + {emqx_action_late_reply, counter}, + {emqx_action_retried, counter}, + {emqx_action_retried_success, counter}, + {emqx_action_retried_failed, counter}, + {emqx_action_dropped_resource_stopped, counter}, + {emqx_action_dropped_resource_not_found, counter}, + {emqx_action_dropped_queue_full, counter}, + {emqx_action_dropped_other, counter}, + {emqx_action_dropped_expired, counter}, + {emqx_action_queuing, gauge} +]). + +-define(CONNECTOR_SPECIFIC_WITH_TYPE, [ + {emqx_connector_enable, gauge}, + {emqx_connector_status, gauge} +]). + +-if(?EMQX_RELEASE_EDITION == ee). +-define(SCHEMA_REGISTRY_WITH_TYPE, [ + emqx_schema_registrys_count +]). +-else. +-endif. + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -68,31 +119,20 @@ collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), %% Data Integration Overview - _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], - _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [ - add_collect_family(Name, connectors_data(Bridges), Callback, gauge) - || Name <- connectors() - ], + ok = add_collect_family(Callback, ?RULES_WITH_TYPE, rules_data(Rules)), + ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, connectors_data(Bridges)), ok = maybe_collect_family_schema_registry(Callback), %% Rule Specific - _ = [ - add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) - || Name <- rule_specific() - ], + ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, rule_specific_data(Rules)), %% Action Specific - _ = [ - add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) - || Name <- action_specific() - ], + ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, action_specific_data(Bridges)), %% Connector Specific - _ = [ - add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) - || Name <- connector_specific() - ], + ok = add_collect_family( + Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, connector_specific_data(Bridges) + ), ok; collect_mf(_, _) -> @@ -114,6 +154,10 @@ collect(<<"prometheus">>) -> %%==================== %% API Helpers +add_collect_family(Callback, MetricWithType, Data) -> + _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type} <- MetricWithType], + ok. + add_collect_family(Name, Data, Callback, Type) -> %% TODO: help document from Name Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). @@ -123,15 +167,21 @@ collect_metrics(Name, Metrics) -> collect_data_integration_overview(Rules, Bridges) -> RulesD = rules_data(Rules), - ActionsD = actions_data(Rules), ConnectorsD = connectors_data(Bridges), - M1 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, rules()), - M2 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ActionsD)} end, #{}, actions()), - M3 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, connectors()), - M4 = maybe_collect_schema_registry(), + M1 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, + #{}, + metric_names(?RULES_WITH_TYPE) + ), + M2 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, + #{}, + metric_names(?CONNECTORS_WITH_TYPE) + ), + M3 = maybe_collect_schema_registry(), - lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]). + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). collect_data_integration(Type, DataSeed) -> maps:fold( @@ -144,10 +194,7 @@ collect_data_integration(Type, DataSeed) -> -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> - _ = [ - add_collect_family(Name, schema_registry_data(), Callback, gauge) - || Name <- schema_registry() - ], + ok = add_collect_family(Callback, ?SCHEMA_REGISTRY_WITH_TYPE, schema_registry_data()), ok. maybe_collect_schema_registry() -> @@ -171,25 +218,15 @@ maybe_collect_schema_registry() -> %%==================== %% All Rules %% Rules -collect_di(K = emqx_rule_count, Data) -> - gauge_metric(?MG(K, Data)); -collect_di(K = emqx_rules_matched_rate, Data) -> - gauge_metric(?MG(K, Data)); -collect_di(K = emqx_rules_matched_rate_last5m, Data) -> - gauge_metric(?MG(K, Data)); -%%==================== -%% All Actions -collect_di(K = emqx_rules_actions_rate, Data) -> - gauge_metric(?MG(K, Data)); -collect_di(K = emqx_rules_actions_rate_last5m, Data) -> +collect_di(K = emqx_rules_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Schema Registry -collect_di(K = emqx_schema_registry_count, Data) -> +collect_di(K = emqx_schema_registrys_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Connectors -collect_di(K = emqx_connector_count, Data) -> +collect_di(K = emqx_connectors_count, Data) -> gauge_metric(?MG(K, Data)); %%======================================== %% Data Integration for Specific: Rule && Action && Connector @@ -198,71 +235,61 @@ collect_di(K = emqx_connector_count, Data) -> %%==================== %% Specific Rule collect_di(K = emqx_rule_matched, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_passed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed_exception, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed_no_result, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_total, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_failed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_failed_unknown, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched_rate, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched_rate_max, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); %%==================== %% Specific Action collect_di(K = emqx_action_matched, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_failed, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_rate, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_inflight, Data) -> + %% inflight type: gauge gauge_metrics(?MG(K, Data)); collect_di(K = emqx_action_received, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_late_reply, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_retried, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_retried_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_retried_failed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_resource_stopped, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_resource_not_found, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_queue_full, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_other, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_expired, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_queuing, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_rate_max, Data) -> + %% queuing type: gauge gauge_metrics(?MG(K, Data)); %%==================== %% Specific Connector @@ -283,100 +310,30 @@ collect_di(K = emqx_connector_status, Data) -> %%==================== %% All Rules -rules() -> - [ - emqx_rule_count, - emqx_rules_matched_rate, - emqx_rules_matched_rate_last5m - ]. - -define(RULE_TAB, emqx_rule_engine). - -rules_data(Rules) -> - Rate = lists:foldl( - fun( - #{id := Id}, - #{emqx_rules_matched_rate := Rate, emqx_rules_matched_rate_last5m := RateLast5m} = AccIn - ) -> - RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), - AccIn#{ - emqx_rules_matched_rate => Rate + - emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), - emqx_rules_matched_rate_last5m => RateLast5m + - emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) - } - end, - _InitAcc = maps:from_keys(rules(), 0), - Rules - ), - Rate#{emqx_rule_count => ets:info(?RULE_TAB, size)}. - -%%==================== -%% All Actions - -actions() -> - [ - emqx_rules_actions_rate, - emqx_rules_actions_rate_last5m - ]. - -actions_data(Rules) -> - lists:foldl( - fun( - #{id := Id}, - #{emqx_rules_actions_rate := Rate, emqx_rules_actions_rate_last5m := RateLast5m} = - _AccIn - ) -> - RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), - _AccIn#{ - emqx_rules_actions_rate => Rate + - emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), - emqx_rules_actions_rate_last5m => RateLast5m + - emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) - } - end, - _InitAcc = maps:from_keys(actions(), 0), - Rules - ). - -actions_exec_count() -> - [ - emqx_action_sink, - emqx_action_source - ]. - -actions_exec_count_data() -> - #{}. +rules_data(_Rules) -> + #{ + emqx_rules_count => ets:info(?RULE_TAB, size) + }. %%==================== %% Schema Registry -if(?EMQX_RELEASE_EDITION == ee). -schema_registry() -> - [ - emqx_schema_registry_count - ]. - schema_registry_data() -> #{ - emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) + emqx_schema_registrys_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. -else. - -endif. %%==================== %% Connectors -connectors() -> - [ - emqx_connector_count - ]. - connectors_data(Brdiges) -> #{ %% Both Bridge V1 and V2 - emqx_connector_count => erlang:length(Brdiges) + emqx_connectors_count => erlang:length(Brdiges) }. %%======================================== @@ -387,29 +344,12 @@ connectors_data(Brdiges) -> %% Specific Rule %% With rule_id as label key: `rule_id` -rule_specific() -> - [ - emqx_rule_matched, - emqx_rule_failed, - emqx_rule_passed, - emqx_rule_failed_exception, - emqx_rule_failed_no_result, - emqx_rule_actions_total, - emqx_rule_actions_success, - emqx_rule_actions_failed, - emqx_rule_actions_failed_out_of_service, - emqx_rule_actions_failed_unknown, - emqx_rule_matched_rate, - emqx_rule_matched_rate_last5m, - emqx_rule_matched_rate_max - ]. - rule_specific_data(Rules) -> lists:foldl( fun(#{id := Id} = Rule, AccIn) -> merge_acc_with_rules(Id, get_metric(Rule), AccIn) end, - maps:from_keys(rule_specific(), []), + maps:from_keys(metric_names(?RULES_SPECIFIC_WITH_TYPE), []), Rules ). @@ -427,7 +367,7 @@ rule_point(Id, V) -> get_metric(#{id := Id} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of - #{counters := Counters, rate := #{matched := MatchedRate}} -> + #{counters := Counters} -> #{ emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), @@ -440,10 +380,7 @@ get_metric(#{id := Id} = _Rule) -> emqx_rule_actions_failed_out_of_service => ?MG( 'actions.failed.out_of_service', Counters ), - emqx_rule_actions_failed_unknown => ?MG('actions.failed.unknown', Counters), - emqx_rule_matched_rate => ?MG(current, MatchedRate), - emqx_rule_matched_rate_last5m => ?MG(last5m, MatchedRate), - emqx_rule_matched_rate_max => ?MG(max, MatchedRate) + emqx_rule_actions_failed_unknown => ?MG('actions.failed.unknown', Counters) } end. @@ -451,36 +388,13 @@ get_metric(#{id := Id} = _Rule) -> %% Specific Action %% With action_id: `{type}:{name}` as label key: `action_id` -action_specific() -> - [ - emqx_action_matched, - emqx_action_dropped, - emqx_action_success, - emqx_action_failed, - emqx_action_rate, - emqx_action_inflight, - emqx_action_received, - emqx_action_late_reply, - emqx_action_retried, - emqx_action_retried_success, - emqx_action_retried_failed, - emqx_action_dropped_resource_stopped, - emqx_action_dropped_resource_not_found, - emqx_action_dropped_queue_full, - emqx_action_dropped_other, - emqx_action_dropped_expired, - emqx_action_queuing, - emqx_action_rate_last5m, - emqx_action_rate_max - ]. - action_specific_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = _Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, - maps:from_keys(action_specific(), []), + maps:from_keys(metric_names(?ACTION_SPECIFIC_WITH_TYPE), []), Bridges ). @@ -498,13 +412,12 @@ action_point(Id, V) -> get_bridge_metric(Type, Name) -> case emqx_bridge:get_metrics(Type, Name) of - #{counters := Counters, rate := #{matched := MatchedRate}, gauges := Gauges} -> + #{counters := Counters, gauges := Gauges} -> #{ emqx_action_matched => ?MG0(matched, Counters), emqx_action_dropped => ?MG0(dropped, Counters), emqx_action_success => ?MG0(success, Counters), emqx_action_failed => ?MG0(failed, Counters), - emqx_action_rate => ?MG0(current, MatchedRate), emqx_action_inflight => ?MG0(inflight, Gauges), emqx_action_received => ?MG0(received, Counters), emqx_action_late_reply => ?MG0(late_reply, Counters), @@ -518,9 +431,7 @@ get_bridge_metric(Type, Name) -> emqx_action_dropped_queue_full => ?MG0('dropped.queue_full', Counters), emqx_action_dropped_other => ?MG0('dropped.other', Counters), emqx_action_dropped_expired => ?MG0('dropped.expired', Counters), - emqx_action_queuing => ?MG0(queuing, Gauges), - emqx_action_rate_last5m => ?MG0(last5m, MatchedRate), - emqx_action_rate_max => ?MG0(max, MatchedRate) + emqx_action_queuing => ?MG0(queuing, Gauges) } end. @@ -528,19 +439,13 @@ get_bridge_metric(Type, Name) -> %% Specific Connector %% With connector_id: `{type}:{name}` as label key: `connector_id` -connector_specific() -> - [ - emqx_connector_enable, - emqx_connector_status - ]. - connector_specific_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) end, - maps:from_keys(connector_specific(), []), + maps:from_keys(metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE), []), Bridges ). @@ -623,3 +528,6 @@ di_data(connectors, Bridges) -> connector_specific_data(Bridges). label_key(rules) -> id; label_key(actions) -> id; label_key(connectors) -> id. + +metric_names(MetricWithType) when is_list(MetricWithType) -> + [Name || {Name, _Type} <- MetricWithType]. From fb330f77e602dcf0a587c89535c93af3b09db61c Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 16 Jan 2024 16:32:41 +0800 Subject: [PATCH 31/89] feat(prometheus): api `format_mode` parameter support - node(default): The only supported format_mode for PushGateway. Return the current node's metrics. - Without label `node_name` with content-type: `text/plain`. - Without key `node_name` with content-type: `application/json` - nodes_aggregated: Return all nodes metrics Arithmetic-Sum or Logical-Sum. See details in callback modules. - Logical-Sum for metrics named with `xxx_enable` or `xxx_status`. - Arithmetic-Sum for other metrics. `node_name` field: - Without label `node_name` with content-type: `text/plain`. - Without key `node_name` with content-type: `application/json` - nodes_unaggregated: Return all nodes metrics without aggregated. `node_name` field: - _With_ label `node_name` with content-type: `text/plain`. - _With_ key `node_name` with content-type: `application/json` --- .../src/emqx_prometheus_api.erl | 60 ++++++++++++++++--- .../src/emqx_prometheus_sup.erl | 3 + 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 32cb89177..9263b6a6a 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -21,10 +21,19 @@ -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/logger.hrl"). +-import( + hoconsc, + [ + mk/2, + ref/1 + ] +). + -export([ api_spec/0, paths/0, - schema/1 + schema/1, + fields/1 ]). -export([ @@ -35,6 +44,8 @@ ]). -define(TAGS, [<<"Monitor">>]). +-define(IS_TRUE(Val), ((Val =:= true) orelse (Val =:= <<"true">>))). +-define(IS_FALSE(Val), ((Val =:= false) orelse (Val =:= <<"false">>))). api_spec() -> emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}). @@ -73,6 +84,7 @@ schema("/prometheus/auth") -> #{ description => ?DESC(get_prom_auth_data), tags => ?TAGS, + parameters => [ref(format_mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -85,6 +97,7 @@ schema("/prometheus/stats") -> #{ description => ?DESC(get_prom_data), tags => ?TAGS, + parameters => [ref(format_mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -97,6 +110,7 @@ schema("/prometheus/data_integration") -> #{ description => ?DESC(get_prom_data_integration_data), tags => ?TAGS, + parameters => [ref(format_mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -108,6 +122,22 @@ security() -> true -> [#{'basicAuth' => []}, #{'bearerAuth' => []}]; false -> [] end. + +fields(format_mode) -> + [ + {format_mode, + mk( + hoconsc:enum([node, nodes_aggregated, nodes_unaggregated]), + #{ + default => node, + desc => <<"Metrics format mode.">>, + in => query, + required => false, + example => false + } + )} + ]. + %%-------------------------------------------------------------------- %% API Handler funcs %%-------------------------------------------------------------------- @@ -129,21 +159,21 @@ setting(put, #{body := Body}) -> {500, 'INTERNAL_ERROR', Message} end. -stats(get, #{headers := Headers}) -> - collect(emqx_prometheus, Headers). +stats(get, #{headers := Headers, query_string := Qs}) -> + collect(emqx_prometheus, collect_opts(Headers, Qs)). -auth(get, #{headers := Headers}) -> - collect(emqx_prometheus_auth, Headers). +auth(get, #{headers := Headers, query_string := Qs}) -> + collect(emqx_prometheus_auth, collect_opts(Headers, Qs)). -data_integration(get, #{headers := Headers}) -> - collect(emqx_prometheus_data_integration, Headers). +data_integration(get, #{headers := Headers, query_string := Qs}) -> + collect(emqx_prometheus_data_integration, collect_opts(Headers, Qs)). %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- -collect(Module, Headers) -> - Type = response_type(Headers), +collect(Module, #{type := Type, format_mode := FormatMode}) -> + erlang:put(format_mode, FormatMode), Data = case erlang:function_exported(Module, collect, 1) of true -> @@ -157,11 +187,23 @@ collect(Module, Headers) -> end, gen_response(Type, Data). +collect_opts(Headers, Qs) -> + #{type => response_type(Headers), format_mode => format_mode(Qs)}. + response_type(#{<<"accept">> := <<"application/json">>}) -> <<"json">>; response_type(_) -> <<"prometheus">>. +format_mode(#{<<"format_mode">> := <<"node">>}) -> + node; +format_mode(#{<<"format_mode">> := <<"nodes_aggregated">>}) -> + nodes_aggregated; +format_mode(#{<<"format_mode">> := <<"nodes_unaggregated">>}) -> + nodes_unaggregated; +format_mode(_) -> + node. + gen_response(<<"json">>, Data) -> {200, Data}; gen_response(<<"prometheus">>, Data) -> diff --git a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl index ea8a2ebaa..1c7eb73e4 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl @@ -67,6 +67,9 @@ init([]) -> Children = case emqx_prometheus_config:is_push_gateway_server_enabled(Conf) of false -> []; + %% TODO: add push gateway for endpoints + %% `/prometheus/auth` + %% `/prometheus/data_integration` true -> [?CHILD(emqx_prometheus, Conf)] end, {ok, {{one_for_one, 10, 3600}, Children}}. From 57f3efde63dd254952754060faa9773ced15a860 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 17 Jan 2024 17:18:46 +0800 Subject: [PATCH 32/89] feat(prom_auth): cluster metrics with different format-mode --- apps/emqx/priv/bpapi.versions | 1 + .../src/emqx_prometheus_api.erl | 12 +- .../src/emqx_prometheus_auth.erl | 312 +++++++++++++----- .../src/proto/emqx_prometheus_proto_v2.erl | 52 +++ 4 files changed, 290 insertions(+), 87 deletions(-) create mode 100644 apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl diff --git a/apps/emqx/priv/bpapi.versions b/apps/emqx/priv/bpapi.versions index 9bd824242..859d7fbe0 100644 --- a/apps/emqx/priv/bpapi.versions +++ b/apps/emqx/priv/bpapi.versions @@ -58,6 +58,7 @@ {emqx_persistent_session_ds,1}. {emqx_plugins,1}. {emqx_prometheus,1}. +{emqx_prometheus,2}. {emqx_resource,1}. {emqx_retainer,1}. {emqx_retainer,2}. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 9263b6a6a..ea71e7ee2 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -43,6 +43,8 @@ data_integration/2 ]). +-export([lookup_from_local_nodes/3]). + -define(TAGS, [<<"Monitor">>]). -define(IS_TRUE(Val), ((Val =:= true) orelse (Val =:= <<"true">>))). -define(IS_FALSE(Val), ((Val =:= false) orelse (Val =:= <<"false">>))). @@ -138,6 +140,10 @@ fields(format_mode) -> )} ]. +%% bpapi +lookup_from_local_nodes(M, F, A) -> + erlang:apply(M, F, A). + %%-------------------------------------------------------------------- %% API Handler funcs %%-------------------------------------------------------------------- @@ -195,11 +201,11 @@ response_type(#{<<"accept">> := <<"application/json">>}) -> response_type(_) -> <<"prometheus">>. -format_mode(#{<<"format_mode">> := <<"node">>}) -> +format_mode(#{<<"format_mode">> := node}) -> node; -format_mode(#{<<"format_mode">> := <<"nodes_aggregated">>}) -> +format_mode(#{<<"format_mode">> := nodes_aggregated}) -> nodes_aggregated; -format_mode(#{<<"format_mode">> := <<"nodes_unaggregated">>}) -> +format_mode(#{<<"format_mode">> := nodes_unaggregated}) -> nodes_unaggregated; format_mode(_) -> node. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 57406d2d2..3e9a9d007 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -23,6 +23,11 @@ -export([collect/1]). +%% for bpapi +-export([ + fetch_metric_data_from_local_node/0 +]). + -include("emqx_prometheus.hrl"). -include_lib("emqx_auth/include/emqx_authn_chains.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -65,6 +70,7 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). +-define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). -define(AUTHNS_WITH_TYPE, [ {emqx_authn_enable, gauge}, @@ -96,6 +102,13 @@ {emqx_banned_count, gauge} ]). +-define(LOGICAL_SUM_METRIC_NAMES, [ + emqx_authn_enable, + emqx_authn_status, + emqx_authz_enable, + emqx_authz_status +]). + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -109,37 +122,29 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, authn_data()), - ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, authn_users_count_data()), - ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, authz_data()), - ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, authz_rules_count_data()), - ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, banned_count_data()), + RawData = raw_data(erlang:get(format_mode)), + ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), + ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), + ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), + ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, ?MG(authz_rules_count, RawData)), + ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, ?MG(banned_count, RawData)), ok; collect_mf(_, _) -> ok. %% @private collect(<<"json">>) -> + FormatMode = erlang:get(format_mode), + RawData = raw_data(FormatMode), + %% TODO: merge node name in json format #{ - emqx_authn => collect_auth_data(authn), - emqx_authz => collect_auth_data(authz), + emqx_authn => collect_json_data(?MG(authn, RawData)), + emqx_authz => collect_json_data(?MG(authz, RawData)), emqx_banned => collect_banned_data() }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_AUTH_REGISTRY). -collect_auth_data(AuthDataType) -> - maps:fold( - fun(K, V, Acc) -> - zip_auth_metrics(AuthDataType, K, V, Acc) - end, - [], - auth_data(AuthDataType) - ). - -collect_banned_data() -> - #{emqx_banned_count => banned_count_data()}. - add_collect_family(Callback, MetricWithType, Data) -> _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type} <- MetricWithType], ok. @@ -150,6 +155,38 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_auth(Name, Metrics). +%% @private +fetch_metric_data_from_local_node() -> + {node(self()), #{ + authn => authn_data(), + authz => authz_data() + }}. + +fetch_cluster_consistented_metric_data() -> + #{ + authn_users_count => authn_users_count_data(), + authz_rules_count => authz_rules_count_data(), + banned_count => banned_count_data() + }. + +%% raw data for different format modes +raw_data(nodes_aggregated) -> + AggregatedNodesMetrics = aggre_cluster(all_nodes_metrics()), + maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(nodes_unaggregated) -> + %% then fold from all nodes + AllNodesMetrics = with_node_name_label(all_nodes_metrics()), + maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(node) -> + {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), + maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). + +all_nodes_metrics() -> + Nodes = mria:running_nodes(), + _ResL = emqx_prometheus_proto_v2:raw_prom_data( + Nodes, ?MODULE, fetch_metric_data_from_local_node, [] + ). + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -370,9 +407,174 @@ banned_count_data() -> mnesia_size(?BANNED_TABLE). %%-------------------------------------------------------------------- -%% Helper functions +%% Collect functions %%-------------------------------------------------------------------- +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `application/json` +collect_json_data(Data) -> + maps:fold( + fun(K, V, Acc) -> + zip_json_metrics(K, V, Acc) + end, + [], + Data + ). + +collect_banned_data() -> + #{emqx_banned_count => banned_count_data()}. + +zip_json_metrics(Key, Points, [] = _AccIn) -> + lists:foldl( + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + %% for initialized empty AccIn + %% The following fields will be put into Result + %% For Authn: + %% `id`, `emqx_authn_users_count` + %% For Authz: + %% `type`, `emqx_authz_rules_count`n + Point = (maps:merge(LablesKVMap, users_or_rule_count(LablesKVMap)))#{Key => Metric}, + [Point | AccIn2] + end, + [], + Points + ); +zip_json_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl( + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] + end, + [], + Points + ), + lists:zipwith( + fun(AllResulted, ThisKeyMetricOut) -> + maps:merge(AllResulted, ThisKeyMetricOut) + end, + AllResultedAcc, + ThisKeyResult + ). + +user_rule_data(authn) -> authn_users_count_data(); +user_rule_data(authz) -> authz_rules_count_data(). + +users_or_rule_count(#{id := Id}) -> + #{emqx_authn_users_count := Points} = user_rule_data(authn), + case lists:keyfind([{id, Id}], 1, Points) of + {_, Metric} -> + #{emqx_authn_users_count => Metric}; + false -> + #{} + end; +users_or_rule_count(#{type := Type}) -> + #{emqx_authz_rules_count := Points} = user_rule_data(authz), + case lists:keyfind([{type, Type}], 1, Points) of + {_, Metric} -> + #{emqx_authz_rules_count => Metric}; + false -> + #{} + end; +users_or_rule_count(_) -> + #{}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `text/plain` +aggre_cluster(ResL) -> + do_aggre_cluster(ResL, aggre_or_zip_init_acc()). + +do_aggre_cluster([], AccIn) -> + AccIn; +do_aggre_cluster( + [{ok, {_NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], + #{authn := AuthnAcc, authz := AuthzAcc} = AccIn +) -> + do_aggre_cluster( + Rest, + AccIn#{ + authn => do_aggre_metric(NodeAuthnMetrics, AuthnAcc), + authz => do_aggre_metric(NodeAuthzMetrics, AuthzAcc) + } + ); +do_aggre_cluster([{_, _} | Rest], AccIn) -> + do_aggre_cluster(Rest, AccIn). + +do_aggre_metric(NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_aggre_metric(K, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NMetric = + case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of + true -> + logic_sum(Metric, ?PG0(Labels, AccIn)); + false -> + Metric + ?PG0(Labels, AccIn) + end, + [{Labels, NMetric} | AccIn] + end, + AccL, + NodeMetrics + ). + +logic_sum(N1, N2) when + (N1 > 0 andalso N2 > 0) +-> + 1; +logic_sum(_, _) -> + 0. + +with_node_name_label(ResL) -> + do_with_node_name_label(ResL, aggre_or_zip_init_acc()). + +do_with_node_name_label([], AccIn) -> + AccIn; +do_with_node_name_label( + [{ok, {NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], + #{authn := AuthnAcc, authz := AuthzAcc} = AccIn +) -> + do_with_node_name_label( + Rest, + AccIn#{ + authn => zip_with_node_name(NodeName, NodeAuthnMetrics, AuthnAcc), + authz => zip_with_node_name(NodeName, NodeAuthzMetrics, AuthzAcc) + } + ); +do_with_node_name_label([{_, _} | Rest], AccIn) -> + do_with_node_name_label(Rest, AccIn). + +zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NLabels = [{node_name, NodeName} | Labels], + [{NLabels, Metric} | AccIn] + end, + AccL, + NodeMetrics + ). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helper funcs + authenticator_id(Authn) -> emqx_authn_chains:authenticator_id(Authn). @@ -398,69 +600,11 @@ boolean_to_number(false) -> 0. status_to_number(connected) -> 1; status_to_number(stopped) -> 0. -zip_auth_metrics(AuthDataType, K, V, Acc) -> - LabelK = label_key(AuthDataType), - UserOrRuleD = user_rule_data(AuthDataType), - do_zip_auth_metrics(LabelK, UserOrRuleD, K, V, Acc). - -do_zip_auth_metrics(LabelK, UserOrRuleD, Key, Points, [] = _AccIn) -> - lists:foldl( - fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> - %% for initialized empty AccIn - %% The following fields will be put into Result - %% For Authn: - %% `id`, `emqx_authn_users_count` - %% For Authz: - %% `type`, `emqx_authz_rules_count` - Point = (users_or_rule_count(LabelK, LabelV, UserOrRuleD))#{ - LabelK => LabelV, Key => Metric - }, - [Point | AccIn2] - end, - [], - Points - ); -do_zip_auth_metrics(LabelK, _UserOrRuleD, Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl( - fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> - [#{LabelK => Id, Key => Metric} | AccIn2] - end, - [], - Points - ), - lists:zipwith( - fun(AllResulted, ThisKeyMetricOut) -> - maps:merge(AllResulted, ThisKeyMetricOut) - end, - AllResultedAcc, - ThisKeyResult - ). - -auth_data(authn) -> authn_data(); -auth_data(authz) -> authz_data(). - -label_key(authn) -> id; -label_key(authz) -> type. - -user_rule_data(authn) -> authn_users_count_data(); -user_rule_data(authz) -> authz_rules_count_data(). - -users_or_rule_count(id, Id, #{emqx_authn_users_count := Points} = _AuthnUsersD) -> - case lists:keyfind([{id, Id}], 1, Points) of - {_, Metric} -> - #{emqx_authn_users_count => Metric}; - false -> - #{} - end; -users_or_rule_count(type, Type, #{emqx_authz_rules_count := Points} = _AuthzRulesD) -> - case lists:keyfind([{type, Type}], 1, Points) of - {_, Metric} -> - #{emqx_authz_rules_count => Metric}; - false -> - #{} - end; -users_or_rule_count(_, _, _) -> - #{}. - metric_names(MetricWithType) when is_list(MetricWithType) -> [Name || {Name, _Type} <- MetricWithType]. + +aggre_or_zip_init_acc() -> + #{ + authn => maps:from_keys(authn_metric_names(), []), + authz => maps:from_keys(authz_metric_names(), []) + }. diff --git a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl new file mode 100644 index 000000000..e3f9b0a26 --- /dev/null +++ b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl @@ -0,0 +1,52 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_prometheus_proto_v2). + +-behaviour(emqx_bpapi). + +-export([ + introduced_in/0, + start/1, + stop/1, + + raw_prom_data/4 +]). + +-include_lib("emqx/include/bpapi.hrl"). + +introduced_in() -> + "5.5.0". + +-spec start([node()]) -> emqx_rpc:multicall_result(). +start(Nodes) -> + rpc:multicall(Nodes, emqx_prometheus, do_start, [], 5000). + +-spec stop([node()]) -> emqx_rpc:multicall_result(). +stop(Nodes) -> + rpc:multicall(Nodes, emqx_prometheus, do_stop, [], 5000). + +-type key() :: atom() | binary() | [byte()]. + +-spec raw_prom_data([node()], key(), key(), key()) -> emqx_rpc:erpc_multicall(term()). +raw_prom_data(Nodes, M, F, A) -> + erpc:multicall( + Nodes, + emqx_prometheus_api, + lookup_from_local_nodes, + [M, F, A], + 5000 + ). From 4fb1ff2f9d44d37c53072da0fc50babbaba07c2e Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 03:22:26 +0800 Subject: [PATCH 33/89] fix(prom_api): format-mode example value --- apps/emqx_prometheus/src/emqx_prometheus_api.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index ea71e7ee2..9b903b53a 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -135,7 +135,7 @@ fields(format_mode) -> desc => <<"Metrics format mode.">>, in => query, required => false, - example => false + example => node } )} ]. From 5914eb5ca58e2b8c99bd0935da8aef87da2b67d3 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 03:27:37 +0800 Subject: [PATCH 34/89] fix(prom_cert): rm cacertfile expiry epoch --- apps/emqx_prometheus/src/emqx_prometheus.erl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 286243c0c..264d818c9 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -746,10 +746,9 @@ emqx_certs() -> -spec emqx_certs_data() -> [_Point :: {[Label], Epoch}] when - Label :: TypeLabel | NameLabel | CertTypeLabel, + Label :: TypeLabel | NameLabel, TypeLabel :: {listener_type, ssl | wss | quic}, NameLabel :: {listener_name, atom()}, - CertTypeLabel :: {cert_type, cacertfile | certfile}, Epoch :: non_neg_integer(). emqx_certs_data() -> case emqx_config:get([listeners], undefined) of @@ -769,7 +768,7 @@ emqx_certs_data() -> points_of_listeners(Type, AllListeners) -> do_points_of_listeners(Type, maps:get(Type, AllListeners, undefined)). --define(CERT_TYPES, [cacertfile, certfile]). +-define(CERT_TYPES, [certfile]). -spec do_points_of_listeners(Type, TypeOfListeners) -> [_Point :: {[{LabelKey, LabelValue}], Epoch}] @@ -792,7 +791,7 @@ do_points_of_listeners(ListenerType, TypeOfListeners) -> ) of undefined -> AccIn; - Path -> [gen_point(ListenerType, Name, CertType, Path) | AccIn] + Path -> [gen_point(ListenerType, Name, Path) | AccIn] end end, [], @@ -803,13 +802,12 @@ do_points_of_listeners(ListenerType, TypeOfListeners) -> maps:keys(TypeOfListeners) ). -gen_point(Type, Name, CertType, Path) -> +gen_point(Type, Name, Path) -> { %% Labels: [{_Labelkey, _LabelValue}] [ {listener_type, Type}, - {listener_name, Name}, - {cert_type, CertType} + {listener_name, Name} ], %% Value cert_expiry_at_from_path(Path) From 5534d5e9dee427b0e4cc90a7f23cdbedf8587e6d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 09:17:22 +0800 Subject: [PATCH 35/89] fix(bpapi): make static_check happy --- apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl index e3f9b0a26..e770dc0ab 100644 --- a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl +++ b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl @@ -39,9 +39,10 @@ start(Nodes) -> stop(Nodes) -> rpc:multicall(Nodes, emqx_prometheus, do_stop, [], 5000). --type key() :: atom() | binary() | [byte()]. +-type key() :: atom(). +-type arg() :: list(term()). --spec raw_prom_data([node()], key(), key(), key()) -> emqx_rpc:erpc_multicall(term()). +-spec raw_prom_data([node()], key(), key(), arg()) -> emqx_rpc:erpc_multicall(term()). raw_prom_data(Nodes, M, F, A) -> erpc:multicall( Nodes, From 9627124d678621358267b236882c91f5628e21df Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 10:08:41 +0800 Subject: [PATCH 36/89] fix(prom_di): metric `emqx_rule_enable` --- .../src/emqx_prometheus_data_integration.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index c41d9a6fb..edbdc1afb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -61,6 +61,7 @@ ]). -define(RULES_SPECIFIC_WITH_TYPE, [ + {emqx_rule_enable, gauge}, {emqx_rule_matched, counter}, {emqx_rule_failed, counter}, {emqx_rule_passed, counter}, @@ -234,6 +235,8 @@ collect_di(K = emqx_connectors_count, Data) -> %%==================== %% Specific Rule +collect_di(K = emqx_rule_enable, Data) -> + gauge_metrics(?MG(K, Data)); collect_di(K = emqx_rule_matched, Data) -> counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed, Data) -> @@ -365,10 +368,11 @@ merge_acc_with_rules(Id, RuleMetrics, PointsAcc) -> rule_point(Id, V) -> {[{id, Id}], V}. -get_metric(#{id := Id} = _Rule) -> +get_metric(#{id := Id, enable := Bool} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of #{counters := Counters} -> #{ + emqx_rule_enable => boolean_to_number(Bool), emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), emqx_rule_passed => ?MG(passed, Counters), From c3da7923233b1d07f8208c47d5c8137fdc66f2da Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 11:23:23 +0800 Subject: [PATCH 37/89] feat(prom_di): cluster aggregated/unaggregated metrics --- .../src/emqx_prometheus_data_integration.erl | 310 ++++++++++++++---- 1 file changed, 246 insertions(+), 64 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index edbdc1afb..06a417d2d 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -24,6 +24,11 @@ -export([collect/1]). +%% for bpapi +-export([ + fetch_metric_data_from_local_node/0 +]). + -export([add_collect_family/4]). -include("emqx_prometheus.hrl"). @@ -105,6 +110,12 @@ -else. -endif. +-define(LOGICAL_SUM_METRIC_NAMES, [ + emqx_rule_enable, + emqx_connector_enable, + emqx_connector_status +]). + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -117,23 +128,24 @@ deregister_cleanup(_) -> ok. _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> - Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), + RawData = raw_data(erlang:get(format_mode)), + %% Data Integration Overview - ok = add_collect_family(Callback, ?RULES_WITH_TYPE, rules_data(Rules)), - ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, connectors_data(Bridges)), + ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), + ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, ?MG(connectors_data, RawData)), ok = maybe_collect_family_schema_registry(Callback), %% Rule Specific - ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, rule_specific_data(Rules)), + RuleSpecificDs = ?MG(rule_specific_data, RawData), + ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, RuleSpecificDs), %% Action Specific - ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, action_specific_data(Bridges)), + ActionSpecificDs = ?MG(action_specific_data, RawData), + ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, ActionSpecificDs), %% Connector Specific - ok = add_collect_family( - Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, connector_specific_data(Bridges) - ), + ConnectorSpecificDs = ?MG(connector_specific_data, RawData), + ok = add_collect_family(Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, ConnectorSpecificDs), ok; collect_mf(_, _) -> @@ -141,13 +153,14 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> + RawData = raw_data(erlang:get(format_mode)), Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ data_integration_overview => collect_data_integration_overview(Rules, Bridges), - rules => collect_data_integration(rules, Rules), - actions => collect_data_integration(actions, Bridges), - connectors => collect_data_integration(connectors, Bridges) + rules => collect_json_data(?MG(rule_specific_data, RawData)), + actions => collect_json_data(?MG(action_specific_data, RawData)), + connectors => collect_json_data(?MG(connector_specific_data, RawData)) }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). @@ -166,32 +179,23 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). -collect_data_integration_overview(Rules, Bridges) -> - RulesD = rules_data(Rules), - ConnectorsD = connectors_data(Bridges), +%% @private +fetch_metric_data_from_local_node() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + {node(self()), #{ + rule_specific_data => rule_specific_data(Rules), + action_specific_data => action_specific_data(Bridges), + connector_specific_data => connector_specific_data(Bridges) + }}. - M1 = lists:foldl( - fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, - #{}, - metric_names(?RULES_WITH_TYPE) - ), - M2 = lists:foldl( - fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, - #{}, - metric_names(?CONNECTORS_WITH_TYPE) - ), - M3 = maybe_collect_schema_registry(), - - lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). - -collect_data_integration(Type, DataSeed) -> - maps:fold( - fun(K, V, Acc) -> - zip_metrics(Type, K, V, Acc) - end, - [], - di_data(Type, DataSeed) - ). +fetch_cluster_consistented_metric_data() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + (maybe_collect_schema_registry())#{ + rules_data => rules_data(Rules), + connectors_data => connectors_data(Bridges) + }. -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> @@ -208,6 +212,24 @@ maybe_collect_schema_registry() -> #{}. -endif. +%% raw data for different format modes +raw_data(nodes_aggregated) -> + AggregatedNodesMetrics = aggre_cluster(metrics_data_from_all_nodes()), + maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(nodes_unaggregated) -> + %% then fold from all nodes + AllNodesMetrics = with_node_name_label(metrics_data_from_all_nodes()), + maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(node) -> + {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), + maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). + +metrics_data_from_all_nodes() -> + Nodes = mria:running_nodes(), + _ResL = emqx_prometheus_proto_v2:raw_prom_data( + Nodes, ?MODULE, fetch_metric_data_from_local_node, [] + ). + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -352,7 +374,7 @@ rule_specific_data(Rules) -> fun(#{id := Id} = Rule, AccIn) -> merge_acc_with_rules(Id, get_metric(Rule), AccIn) end, - maps:from_keys(metric_names(?RULES_SPECIFIC_WITH_TYPE), []), + maps:from_keys(rule_specific_metric_names(), []), Rules ). @@ -388,6 +410,9 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> } end. +rule_specific_metric_names() -> + metric_names(?RULES_SPECIFIC_WITH_TYPE). + %%==================== %% Specific Action %% With action_id: `{type}:{name}` as label key: `action_id` @@ -398,7 +423,7 @@ action_specific_data(Bridges) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, - maps:from_keys(metric_names(?ACTION_SPECIFIC_WITH_TYPE), []), + maps:from_keys(action_specific_metric_names(), []), Bridges ). @@ -439,6 +464,9 @@ get_bridge_metric(Type, Name) -> } end. +action_specific_metric_names() -> + metric_names(?ACTION_SPECIFIC_WITH_TYPE). + %%==================== %% Specific Connector %% With connector_id: `{type}:{name}` as label key: `connector_id` @@ -449,7 +477,7 @@ connector_specific_data(Bridges) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) end, - maps:from_keys(metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE), []), + maps:from_keys(connectr_specific_metric_names(), []), Bridges ). @@ -473,24 +501,47 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> emqx_connector_status => status_to_number(Status) }. -%%-------------------------------------------------------------------- +connectr_specific_metric_names() -> + metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). %%-------------------------------------------------------------------- -%% Help funcs +%% Collect functions +%%-------------------------------------------------------------------- -boolean_to_number(true) -> 1; -boolean_to_number(false) -> 0. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `application/json` -status_to_number(connected) -> 1; -status_to_number(disconnected) -> 0. +collect_data_integration_overview(Rules, Bridges) -> + RulesD = rules_data(Rules), + ConnectorsD = connectors_data(Bridges), -zip_metrics(Type, K, V, Acc) -> - LabelK = label_key(Type), - do_zip_metrics(LabelK, K, V, Acc). + M1 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, + #{}, + metric_names(?RULES_WITH_TYPE) + ), + M2 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, + #{}, + metric_names(?CONNECTORS_WITH_TYPE) + ), + M3 = maybe_collect_schema_registry(), -do_zip_metrics(LabelK, Key, Points, [] = _AccIn) -> + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). + +collect_json_data(Data) -> + maps:fold( + fun(K, V, Acc) -> + zip_json_metrics(K, V, Acc) + end, + [], + Data + ). + +zip_json_metrics(Key, Points, [] = _AccIn) -> lists:foldl( - fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), %% for initialized empty AccIn %% The following fields will be put into Result %% For Rules: @@ -500,19 +551,17 @@ do_zip_metrics(LabelK, Key, Points, [] = _AccIn) -> %% FOR Connectors %% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID %% formatted with {type}:{name} - Point = - #{ - LabelK => LabelV, Key => Metric - }, + Point = LablesKVMap#{Key => Metric}, [Point | AccIn2] end, [], Points ); -do_zip_metrics(LabelK, Key, Points, AllResultedAcc) -> +zip_json_metrics(Key, Points, AllResultedAcc) -> ThisKeyResult = lists:foldl( - fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> - [#{LabelK => Id, Key => Metric} | AccIn2] + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] end, [], Points @@ -525,13 +574,146 @@ do_zip_metrics(LabelK, Key, Points, AllResultedAcc) -> ThisKeyResult ). -di_data(rules, Rules) -> rule_specific_data(Rules); -di_data(actions, Bridges) -> action_specific_data(Bridges); -di_data(connectors, Bridges) -> connector_specific_data(Bridges). +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `text/plain` +aggre_cluster(ResL) -> + do_aggre_cluster(ResL, aggre_or_zip_init_acc()). -label_key(rules) -> id; -label_key(actions) -> id; -label_key(connectors) -> id. +do_aggre_cluster([], AccIn) -> + AccIn; +do_aggre_cluster( + [ + {ok, + {_NodeName, #{ + rule_specific_data := NodeRuleMetrics, + action_specific_data := NodeActionMetrics, + connector_specific_data := NodeConnectorMetrics + }}} + | Rest + ], + #{ + rule_specific_data := RuleAcc, + action_specific_data := ActionAcc, + connector_specific_data := ConnAcc + } = AccIn +) -> + do_aggre_cluster( + Rest, + AccIn#{ + %% TODO + rule_specific_data => do_aggre_metric(NodeRuleMetrics, RuleAcc), + action_specific_data => do_aggre_metric(NodeActionMetrics, ActionAcc), + connector_specific_data => do_aggre_metric(NodeConnectorMetrics, ConnAcc) + } + ); +do_aggre_cluster([{_, _} | Rest], AccIn) -> + do_aggre_cluster(Rest, AccIn). + +do_aggre_metric(NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +-define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). + +do_aggre_metric(K, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NMetric = + case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of + true -> + logic_sum(Metric, ?PG0(Labels, AccIn)); + false -> + Metric + ?PG0(Labels, AccIn) + end, + [{Labels, NMetric} | AccIn] + end, + AccL, + NodeMetrics + ). + +with_node_name_label(ResL) -> + do_with_node_name_label( + ResL, + aggre_or_zip_init_acc() + ). + +do_with_node_name_label([], AccIn) -> + AccIn; +do_with_node_name_label( + [ + {ok, + {NodeName, #{ + rule_specific_data := NodeRuleMetrics, + action_specific_data := NodeActionMetrics, + connector_specific_data := NodeConnectorMetrics + }}} + | Rest + ], + #{ + rule_specific_data := RuleAcc, + action_specific_data := ActionAcc, + connector_specific_data := ConnAcc + } = AccIn +) -> + do_with_node_name_label( + Rest, + AccIn#{ + rule_specific_data => zip_with_node_name(NodeName, NodeRuleMetrics, RuleAcc), + action_specific_data => zip_with_node_name(NodeName, NodeActionMetrics, ActionAcc), + connector_specific_data => zip_with_node_name(NodeName, NodeConnectorMetrics, ConnAcc) + } + ); +do_with_node_name_label([{_, _} | Rest], AccIn) -> + do_with_node_name_label(Rest, AccIn). + +zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NLabels = [{node_name, NodeName} | Labels], + [{NLabels, Metric} | AccIn] + end, + AccL, + NodeMetrics + ). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helper funcs + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +status_to_number(disconnected) -> 0. + +logic_sum(N1, N2) when + (N1 > 0 andalso N2 > 0) +-> + 1; +logic_sum(_, _) -> + 0. metric_names(MetricWithType) when is_list(MetricWithType) -> [Name || {Name, _Type} <- MetricWithType]. + +aggre_or_zip_init_acc() -> + #{ + rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), + action_specific_data => maps:from_keys(action_specific_metric_names(), []), + connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) + }. From 8cb12c6a74ffafe96d2d66441fda44de3c277ca0 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 17:49:24 +0800 Subject: [PATCH 38/89] refactor: use utils func for prom_auth and prom_di --- .../src/emqx_prometheus_auth.erl | 162 +++----------- .../src/emqx_prometheus_data_integration.erl | 198 +++--------------- .../src/emqx_prometheus_utils.erl | 160 ++++++++++++++ 3 files changed, 214 insertions(+), 306 deletions(-) create mode 100644 apps/emqx_prometheus/src/emqx_prometheus_utils.erl diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 3e9a9d007..de91fb8b2 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -28,6 +28,11 @@ fetch_metric_data_from_local_node/0 ]). +%% %% @private +-export([ + zip_json_auth_metrics/3 +]). + -include("emqx_prometheus.hrl"). -include_lib("emqx_auth/include/emqx_authn_chains.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -282,7 +287,7 @@ lookup_authn_metrics_local(Id) -> case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authn_status => status_to_number(Status), + emqx_authn_status => emqx_prometheus_utils:status_to_number(Status), emqx_authn_nomatch => ?MG0(nomatch, Counters), emqx_authn_total => ?MG0(total, Counters), emqx_authn_success => ?MG0(success, Counters), @@ -293,7 +298,7 @@ lookup_authn_metrics_local(Id) -> end. authn_metric_names() -> - metric_names(?AUTHNS_WITH_TYPE). + emqx_prometheus_utils:metric_names(?AUTHNS_WITH_TYPE). %%==================== %% Authn users count @@ -360,7 +365,7 @@ lookup_authz_metrics_local(Type) -> case emqx_authz_api_sources:lookup_from_local_node(Type) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authz_status => status_to_number(Status), + emqx_authz_status => emqx_prometheus_utils:status_to_number(Status), emqx_authz_nomatch => ?MG0(nomatch, Counters), emqx_authz_total => ?MG0(total, Counters), emqx_authz_success => ?MG0(success, Counters), @@ -371,7 +376,7 @@ lookup_authz_metrics_local(Type) -> end. authz_metric_names() -> - metric_names(?AUTHZS_WITH_TYPE). + emqx_prometheus_utils:metric_names(?AUTHZS_WITH_TYPE). %%==================== %% Authz rules count @@ -412,56 +417,35 @@ banned_count_data() -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `application/json` + collect_json_data(Data) -> - maps:fold( - fun(K, V, Acc) -> - zip_json_metrics(K, V, Acc) - end, - [], - Data - ). + emqx_prometheus_utils:collect_json_data(Data, fun zip_json_auth_metrics/3). collect_banned_data() -> #{emqx_banned_count => banned_count_data()}. -zip_json_metrics(Key, Points, [] = _AccIn) -> +%% for initialized empty AccIn +%% The following fields will be put into Result +%% For Authn: +%% `id`, `emqx_authn_users_count` +%% For Authz: +%% `type`, `emqx_authz_rules_count`n +zip_json_auth_metrics(Key, Points, [] = _AccIn) -> lists:foldl( fun({Lables, Metric}, AccIn2) -> LablesKVMap = maps:from_list(Lables), - %% for initialized empty AccIn - %% The following fields will be put into Result - %% For Authn: - %% `id`, `emqx_authn_users_count` - %% For Authz: - %% `type`, `emqx_authz_rules_count`n Point = (maps:merge(LablesKVMap, users_or_rule_count(LablesKVMap)))#{Key => Metric}, [Point | AccIn2] end, [], Points ); -zip_json_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl( - fun({Lables, Metric}, AccIn2) -> - LablesKVMap = maps:from_list(Lables), - [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] - end, - [], - Points - ), - lists:zipwith( - fun(AllResulted, ThisKeyMetricOut) -> - maps:merge(AllResulted, ThisKeyMetricOut) - end, - AllResultedAcc, - ThisKeyResult - ). - -user_rule_data(authn) -> authn_users_count_data(); -user_rule_data(authz) -> authz_rules_count_data(). +zip_json_auth_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). users_or_rule_count(#{id := Id}) -> - #{emqx_authn_users_count := Points} = user_rule_data(authn), + #{emqx_authn_users_count := Points} = authn_users_count_data(), case lists:keyfind([{id, Id}], 1, Points) of {_, Metric} -> #{emqx_authn_users_count => Metric}; @@ -469,7 +453,7 @@ users_or_rule_count(#{id := Id}) -> #{} end; users_or_rule_count(#{type := Type}) -> - #{emqx_authz_rules_count := Points} = user_rule_data(authz), + #{emqx_authz_rules_count := Points} = authz_rules_count_data(), case lists:keyfind([{type, Type}], 1, Points) of {_, Metric} -> #{emqx_authz_rules_count => Metric}; @@ -482,95 +466,10 @@ users_or_rule_count(_) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `text/plain` aggre_cluster(ResL) -> - do_aggre_cluster(ResL, aggre_or_zip_init_acc()). - -do_aggre_cluster([], AccIn) -> - AccIn; -do_aggre_cluster( - [{ok, {_NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], - #{authn := AuthnAcc, authz := AuthzAcc} = AccIn -) -> - do_aggre_cluster( - Rest, - AccIn#{ - authn => do_aggre_metric(NodeAuthnMetrics, AuthnAcc), - authz => do_aggre_metric(NodeAuthzMetrics, AuthzAcc) - } - ); -do_aggre_cluster([{_, _} | Rest], AccIn) -> - do_aggre_cluster(Rest, AccIn). - -do_aggre_metric(NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - -do_aggre_metric(K, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NMetric = - case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of - true -> - logic_sum(Metric, ?PG0(Labels, AccIn)); - false -> - Metric + ?PG0(Labels, AccIn) - end, - [{Labels, NMetric} | AccIn] - end, - AccL, - NodeMetrics - ). - -logic_sum(N1, N2) when - (N1 > 0 andalso N2 > 0) --> - 1; -logic_sum(_, _) -> - 0. + emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). with_node_name_label(ResL) -> - do_with_node_name_label(ResL, aggre_or_zip_init_acc()). - -do_with_node_name_label([], AccIn) -> - AccIn; -do_with_node_name_label( - [{ok, {NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], - #{authn := AuthnAcc, authz := AuthzAcc} = AccIn -) -> - do_with_node_name_label( - Rest, - AccIn#{ - authn => zip_with_node_name(NodeName, NodeAuthnMetrics, AuthnAcc), - authz => zip_with_node_name(NodeName, NodeAuthzMetrics, AuthzAcc) - } - ); -do_with_node_name_label([{_, _} | Rest], AccIn) -> - do_with_node_name_label(Rest, AccIn). - -zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - -do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NLabels = [{node_name, NodeName} | Labels], - [{NLabels, Metric} | AccIn] - end, - AccL, - NodeMetrics - ). + emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Helper funcs @@ -590,19 +489,10 @@ mnesia_size(Tab) -> mnesia:table_info(Tab, size). do_metric(emqx_authn_enable, #{enable := B}, _) -> - boolean_to_number(B); + emqx_prometheus_utils:boolean_to_number(B); do_metric(K, _, Metrics) -> ?MG0(K, Metrics). -boolean_to_number(true) -> 1; -boolean_to_number(false) -> 0. - -status_to_number(connected) -> 1; -status_to_number(stopped) -> 0. - -metric_names(MetricWithType) when is_list(MetricWithType) -> - [Name || {Name, _Type} <- MetricWithType]. - aggre_or_zip_init_acc() -> #{ authn => maps:from_keys(authn_metric_names(), []), diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 06a417d2d..3d31b4e1e 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -24,6 +24,10 @@ -export([collect/1]). +-export([ + zip_json_data_integration_metrics/3 +]). + %% for bpapi -export([ fetch_metric_data_from_local_node/0 @@ -394,7 +398,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of #{counters := Counters} -> #{ - emqx_rule_enable => boolean_to_number(Bool), + emqx_rule_enable => emqx_prometheus_utils:boolean_to_number(Bool), emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), emqx_rule_passed => ?MG(passed, Counters), @@ -411,7 +415,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> end. rule_specific_metric_names() -> - metric_names(?RULES_SPECIFIC_WITH_TYPE). + emqx_prometheus_utils:metric_names(?RULES_SPECIFIC_WITH_TYPE). %%==================== %% Specific Action @@ -465,7 +469,7 @@ get_bridge_metric(Type, Name) -> end. action_specific_metric_names() -> - metric_names(?ACTION_SPECIFIC_WITH_TYPE). + emqx_prometheus_utils:metric_names(?ACTION_SPECIFIC_WITH_TYPE). %%==================== %% Specific Connector @@ -497,12 +501,12 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), Status = ?MG(status, ResourceData), #{ - emqx_connector_enable => boolean_to_number(Enabled), - emqx_connector_status => status_to_number(Status) + emqx_connector_enable => emqx_prometheus_utils:boolean_to_number(Enabled), + emqx_connector_status => emqx_prometheus_utils:status_to_number(Status) }. connectr_specific_metric_names() -> - metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). + emqx_prometheus_utils:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). %%-------------------------------------------------------------------- %% Collect functions @@ -510,7 +514,6 @@ connectr_specific_metric_names() -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `application/json` - collect_data_integration_overview(Rules, Bridges) -> RulesD = rules_data(Rules), ConnectorsD = connectors_data(Bridges), @@ -518,199 +521,54 @@ collect_data_integration_overview(Rules, Bridges) -> M1 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, - metric_names(?RULES_WITH_TYPE) + emqx_prometheus_utils:metric_names(?RULES_WITH_TYPE) ), M2 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, - metric_names(?CONNECTORS_WITH_TYPE) + emqx_prometheus_utils:metric_names(?CONNECTORS_WITH_TYPE) ), M3 = maybe_collect_schema_registry(), lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). collect_json_data(Data) -> - maps:fold( - fun(K, V, Acc) -> - zip_json_metrics(K, V, Acc) - end, - [], - Data - ). + emqx_prometheus_utils:collect_json_data(Data, fun zip_json_data_integration_metrics/3). -zip_json_metrics(Key, Points, [] = _AccIn) -> +%% for initialized empty AccIn +%% The following fields will be put into Result +%% For Rules: +%% `id` => [RULE_ID] +%% For Actions +%% `id` => [ACTION_ID] +%% FOR Connectors +%% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID +%% formatted with {type}:{name} +zip_json_data_integration_metrics(Key, Points, [] = _AccIn) -> lists:foldl( fun({Lables, Metric}, AccIn2) -> LablesKVMap = maps:from_list(Lables), - %% for initialized empty AccIn - %% The following fields will be put into Result - %% For Rules: - %% `id` => [RULE_ID] - %% For Actions - %% `id` => [ACTION_ID] - %% FOR Connectors - %% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID - %% formatted with {type}:{name} Point = LablesKVMap#{Key => Metric}, [Point | AccIn2] end, [], Points ); -zip_json_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl( - fun({Lables, Metric}, AccIn2) -> - LablesKVMap = maps:from_list(Lables), - [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] - end, - [], - Points - ), - lists:zipwith( - fun(AllResulted, ThisKeyMetricOut) -> - maps:merge(AllResulted, ThisKeyMetricOut) - end, - AllResultedAcc, - ThisKeyResult - ). +zip_json_data_integration_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `text/plain` aggre_cluster(ResL) -> - do_aggre_cluster(ResL, aggre_or_zip_init_acc()). - -do_aggre_cluster([], AccIn) -> - AccIn; -do_aggre_cluster( - [ - {ok, - {_NodeName, #{ - rule_specific_data := NodeRuleMetrics, - action_specific_data := NodeActionMetrics, - connector_specific_data := NodeConnectorMetrics - }}} - | Rest - ], - #{ - rule_specific_data := RuleAcc, - action_specific_data := ActionAcc, - connector_specific_data := ConnAcc - } = AccIn -) -> - do_aggre_cluster( - Rest, - AccIn#{ - %% TODO - rule_specific_data => do_aggre_metric(NodeRuleMetrics, RuleAcc), - action_specific_data => do_aggre_metric(NodeActionMetrics, ActionAcc), - connector_specific_data => do_aggre_metric(NodeConnectorMetrics, ConnAcc) - } - ); -do_aggre_cluster([{_, _} | Rest], AccIn) -> - do_aggre_cluster(Rest, AccIn). - -do_aggre_metric(NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - --define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). - -do_aggre_metric(K, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NMetric = - case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of - true -> - logic_sum(Metric, ?PG0(Labels, AccIn)); - false -> - Metric + ?PG0(Labels, AccIn) - end, - [{Labels, NMetric} | AccIn] - end, - AccL, - NodeMetrics - ). + emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). with_node_name_label(ResL) -> - do_with_node_name_label( - ResL, - aggre_or_zip_init_acc() - ). - -do_with_node_name_label([], AccIn) -> - AccIn; -do_with_node_name_label( - [ - {ok, - {NodeName, #{ - rule_specific_data := NodeRuleMetrics, - action_specific_data := NodeActionMetrics, - connector_specific_data := NodeConnectorMetrics - }}} - | Rest - ], - #{ - rule_specific_data := RuleAcc, - action_specific_data := ActionAcc, - connector_specific_data := ConnAcc - } = AccIn -) -> - do_with_node_name_label( - Rest, - AccIn#{ - rule_specific_data => zip_with_node_name(NodeName, NodeRuleMetrics, RuleAcc), - action_specific_data => zip_with_node_name(NodeName, NodeActionMetrics, ActionAcc), - connector_specific_data => zip_with_node_name(NodeName, NodeConnectorMetrics, ConnAcc) - } - ); -do_with_node_name_label([{_, _} | Rest], AccIn) -> - do_with_node_name_label(Rest, AccIn). - -zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - -do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NLabels = [{node_name, NodeName} | Labels], - [{NLabels, Metric} | AccIn] - end, - AccL, - NodeMetrics - ). + emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Helper funcs -boolean_to_number(true) -> 1; -boolean_to_number(false) -> 0. - -status_to_number(connected) -> 1; -status_to_number(disconnected) -> 0. - -logic_sum(N1, N2) when - (N1 > 0 andalso N2 > 0) --> - 1; -logic_sum(_, _) -> - 0. - -metric_names(MetricWithType) when is_list(MetricWithType) -> - [Name || {Name, _Type} <- MetricWithType]. - aggre_or_zip_init_acc() -> #{ rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), diff --git a/apps/emqx_prometheus/src/emqx_prometheus_utils.erl b/apps/emqx_prometheus/src/emqx_prometheus_utils.erl new file mode 100644 index 000000000..fadfb5c47 --- /dev/null +++ b/apps/emqx_prometheus/src/emqx_prometheus_utils.erl @@ -0,0 +1,160 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_prometheus_utils). + +-export([ + collect_json_data/2, + + aggre_cluster/3, + with_node_name_label/2, + + point_to_map_fun/1, + + boolean_to_number/1, + status_to_number/1, + metric_names/1 +]). + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). + +collect_json_data(Data, Func) when is_function(Func, 3) -> + maps:fold( + fun(K, V, Acc) -> + Func(K, V, Acc) + end, + [], + Data + ); +collect_json_data(_, _) -> + error(badarg). + +aggre_cluster(LogicSumKs, ResL, Init) -> + do_aggre_cluster(LogicSumKs, ResL, Init). + +do_aggre_cluster(_LogicSumKs, [], AccIn) -> + AccIn; +do_aggre_cluster(LogicSumKs, [{ok, {_NodeName, NodeMetric}} | Rest], AccIn) -> + do_aggre_cluster( + LogicSumKs, + Rest, + maps:fold( + fun(K, V, AccIn0) -> + AccIn0#{K => aggre_metric(LogicSumKs, V, ?MG(K, AccIn0))} + end, + AccIn, + NodeMetric + ) + %% merge_node_and_acc() + ); +do_aggre_cluster(LogicSumKs, [{_, _} | Rest], AccIn) -> + do_aggre_cluster(LogicSumKs, Rest, AccIn). + +aggre_metric(LogicSumKs, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_aggre_metric( + K, LogicSumKs, ?MG(K, NodeMetrics), ?MG(K, AccIn) + ), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_aggre_metric(K, LogicSumKs, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NMetric = + case lists:member(K, LogicSumKs) of + true -> + logic_sum(Metric, ?PG0(Labels, AccIn)); + false -> + Metric + ?PG0(Labels, AccIn) + end, + [{Labels, NMetric} | AccIn] + end, + AccL, + NodeMetrics + ). + +with_node_name_label(ResL, Init) -> + do_with_node_name_label(ResL, Init). + +do_with_node_name_label([], AccIn) -> + AccIn; +do_with_node_name_label([{ok, {NodeName, NodeMetric}} | Rest], AccIn) -> + do_with_node_name_label( + Rest, + maps:fold( + fun(K, V, AccIn0) -> + AccIn0#{ + K => zip_with_node_name(NodeName, V, ?MG(K, AccIn0)) + } + end, + AccIn, + NodeMetric + ) + ); +do_with_node_name_label([{_, _} | Rest], AccIn) -> + do_with_node_name_label(Rest, AccIn). + +zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NLabels = [{node, NodeName} | Labels], + [{NLabels, Metric} | AccIn] + end, + AccL, + NodeMetrics + ). + +point_to_map_fun(Key) -> + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +logic_sum(N1, N2) when + (N1 > 0 andalso N2 > 0) +-> + 1; +logic_sum(_, _) -> + 0. + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +%% for auth +status_to_number(stopped) -> 0; +%% for data_integration +status_to_number(disconnected) -> 0. + +metric_names(MetricWithType) when is_list(MetricWithType) -> + [Name || {Name, _Type} <- MetricWithType]. From b480c5b3718349978a4cb52398eed51a1ccc1b8b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 11:23:40 +0800 Subject: [PATCH 39/89] fix(prom): use name `mode` and macros to put/get format mode --- .../include/emqx_prometheus.hrl | 15 +++++++ .../src/emqx_prometheus_api.erl | 39 +++++++++++-------- .../src/emqx_prometheus_auth.erl | 11 +++--- .../src/emqx_prometheus_data_integration.erl | 10 ++--- 4 files changed, 47 insertions(+), 28 deletions(-) diff --git a/apps/emqx_prometheus/include/emqx_prometheus.hrl b/apps/emqx_prometheus/include/emqx_prometheus.hrl index 9057f2b14..b36f647da 100644 --- a/apps/emqx_prometheus/include/emqx_prometheus.hrl +++ b/apps/emqx_prometheus/include/emqx_prometheus.hrl @@ -28,3 +28,18 @@ ?PROMETHEUS_AUTH_REGISTRY, ?PROMETHEUS_DATA_INTEGRATION_REGISTRY ]). + +-define(PROM_DATA_MODE__NODE, node). +-define(PROM_DATA_MODE__ALL_NODES_AGGREGATED, all_nodes_aggregated). +-define(PROM_DATA_MODE__ALL_NODES_UNAGGREGATED, all_nodes_unaggregated). + +-define(PROM_DATA_MODES, [ + ?PROM_DATA_MODE__NODE, + ?PROM_DATA_MODE__ALL_NODES_AGGREGATED, + ?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED +]). + +-define(PROM_DATA_MODE_KEY__, prom_data_mode). + +-define(PUT_PROM_DATA_MODE(MODE__), erlang:put(?PROM_DATA_MODE_KEY__, MODE__)). +-define(GET_PROM_DATA_MODE(), erlang:get(?PROM_DATA_MODE_KEY__)). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 9b903b53a..47a5b0299 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -18,6 +18,7 @@ -behaviour(minirest_api). +-include("emqx_prometheus.hrl"). -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/logger.hrl"). @@ -86,7 +87,7 @@ schema("/prometheus/auth") -> #{ description => ?DESC(get_prom_auth_data), tags => ?TAGS, - parameters => [ref(format_mode)], + parameters => [ref(mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -99,7 +100,7 @@ schema("/prometheus/stats") -> #{ description => ?DESC(get_prom_data), tags => ?TAGS, - parameters => [ref(format_mode)], + parameters => [ref(mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -112,7 +113,7 @@ schema("/prometheus/data_integration") -> #{ description => ?DESC(get_prom_data_integration_data), tags => ?TAGS, - parameters => [ref(format_mode)], + parameters => [ref(mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -125,11 +126,11 @@ security() -> false -> [] end. -fields(format_mode) -> +fields(mode) -> [ - {format_mode, + {mode, mk( - hoconsc:enum([node, nodes_aggregated, nodes_unaggregated]), + hoconsc:enum(?PROM_DATA_MODES), #{ default => node, desc => <<"Metrics format mode.">>, @@ -178,8 +179,13 @@ data_integration(get, #{headers := Headers, query_string := Qs}) -> %% Internal funcs %%-------------------------------------------------------------------- -collect(Module, #{type := Type, format_mode := FormatMode}) -> - erlang:put(format_mode, FormatMode), +collect(Module, #{type := Type, mode := Mode}) -> + %% `Mode` is used to control the format of the returned data + %% It will used in callback `Module:collect_mf/1` to fetch data from node or cluster + %% And use this mode parameter to determine the formatting method of the returned information. + %% Since the arity of the callback function has been fixed. + %% so it is placed in the process dictionary of the current process. + ?PUT_PROM_DATA_MODE(Mode), Data = case erlang:function_exported(Module, collect, 1) of true -> @@ -194,21 +200,20 @@ collect(Module, #{type := Type, format_mode := FormatMode}) -> gen_response(Type, Data). collect_opts(Headers, Qs) -> - #{type => response_type(Headers), format_mode => format_mode(Qs)}. + #{type => response_type(Headers), mode => mode(Qs)}. response_type(#{<<"accept">> := <<"application/json">>}) -> <<"json">>; response_type(_) -> <<"prometheus">>. -format_mode(#{<<"format_mode">> := node}) -> - node; -format_mode(#{<<"format_mode">> := nodes_aggregated}) -> - nodes_aggregated; -format_mode(#{<<"format_mode">> := nodes_unaggregated}) -> - nodes_unaggregated; -format_mode(_) -> - node. +mode(#{<<"mode">> := Mode}) -> + case lists:member(Mode, ?PROM_DATA_MODES) of + true -> Mode; + false -> ?PROM_DATA_MODE__NODE + end; +mode(_) -> + ?PROM_DATA_MODE__NODE. gen_response(<<"json">>, Data) -> {200, Data}; diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index de91fb8b2..02010aaf7 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -127,7 +127,7 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - RawData = raw_data(erlang:get(format_mode)), + RawData = raw_data(?GET_PROM_DATA_MODE()), ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), @@ -139,8 +139,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - FormatMode = erlang:get(format_mode), - RawData = raw_data(FormatMode), + RawData = raw_data(?GET_PROM_DATA_MODE()), %% TODO: merge node name in json format #{ emqx_authn => collect_json_data(?MG(authn, RawData)), @@ -175,14 +174,14 @@ fetch_cluster_consistented_metric_data() -> }. %% raw data for different format modes -raw_data(nodes_aggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> AggregatedNodesMetrics = aggre_cluster(all_nodes_metrics()), maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(nodes_unaggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> %% then fold from all nodes AllNodesMetrics = with_node_name_label(all_nodes_metrics()), maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(node) -> +raw_data(?PROM_DATA_MODE__NODE) -> {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 3d31b4e1e..729e1f640 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -132,7 +132,7 @@ deregister_cleanup(_) -> ok. _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> - RawData = raw_data(erlang:get(format_mode)), + RawData = raw_data(?GET_PROM_DATA_MODE()), %% Data Integration Overview ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), @@ -157,7 +157,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - RawData = raw_data(erlang:get(format_mode)), + RawData = raw_data(?GET_PROM_DATA_MODE()), Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ @@ -217,14 +217,14 @@ maybe_collect_schema_registry() -> -endif. %% raw data for different format modes -raw_data(nodes_aggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> AggregatedNodesMetrics = aggre_cluster(metrics_data_from_all_nodes()), maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(nodes_unaggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> %% then fold from all nodes AllNodesMetrics = with_node_name_label(metrics_data_from_all_nodes()), maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(node) -> +raw_data(?PROM_DATA_MODE__NODE) -> {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). From c6c1a7fc288b3ce6f0fb9bf0c70edd1db2c3f421 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 15:52:09 +0800 Subject: [PATCH 40/89] refactor(prom): prom_auth and prom_di as prom_cluster behaviour --- .../src/emqx_prometheus_auth.erl | 68 ++++------- ..._utils.erl => emqx_prometheus_cluster.erl} | 43 ++++++- .../src/emqx_prometheus_data_integration.erl | 115 ++++++++---------- 3 files changed, 113 insertions(+), 113 deletions(-) rename apps/emqx_prometheus/src/{emqx_prometheus_utils.erl => emqx_prometheus_cluster.erl} (76%) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 02010aaf7..5fa9057da 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -24,8 +24,12 @@ -export([collect/1]). %% for bpapi +-behaviour(emqx_prometheus_cluster). -export([ - fetch_metric_data_from_local_node/0 + fetch_data_from_local_node/0, + fetch_cluster_consistented_data/0, + aggre_or_zip_init_acc/0, + logic_sum_metrics/0 ]). %% %% @private @@ -127,7 +131,7 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), @@ -139,8 +143,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), - %% TODO: merge node name in json format + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), #{ emqx_authn => collect_json_data(?MG(authn, RawData)), emqx_authz => collect_json_data(?MG(authz, RawData)), @@ -159,37 +162,28 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_auth(Name, Metrics). -%% @private -fetch_metric_data_from_local_node() -> +%% behaviour +fetch_data_from_local_node() -> {node(self()), #{ authn => authn_data(), authz => authz_data() }}. -fetch_cluster_consistented_metric_data() -> +fetch_cluster_consistented_data() -> #{ authn_users_count => authn_users_count_data(), authz_rules_count => authz_rules_count_data(), banned_count => banned_count_data() }. -%% raw data for different format modes -raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> - AggregatedNodesMetrics = aggre_cluster(all_nodes_metrics()), - maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> - %% then fold from all nodes - AllNodesMetrics = with_node_name_label(all_nodes_metrics()), - maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__NODE) -> - {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), - maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). +aggre_or_zip_init_acc() -> + #{ + authn => maps:from_keys(authn_metric_names(), []), + authz => maps:from_keys(authz_metric_names(), []) + }. -all_nodes_metrics() -> - Nodes = mria:running_nodes(), - _ResL = emqx_prometheus_proto_v2:raw_prom_data( - Nodes, ?MODULE, fetch_metric_data_from_local_node, [] - ). +logic_sum_metrics() -> + ?LOGICAL_SUM_METRIC_NAMES. %%-------------------------------------------------------------------- %% Collector @@ -286,7 +280,7 @@ lookup_authn_metrics_local(Id) -> case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authn_status => emqx_prometheus_utils:status_to_number(Status), + emqx_authn_status => emqx_prometheus_cluster:status_to_number(Status), emqx_authn_nomatch => ?MG0(nomatch, Counters), emqx_authn_total => ?MG0(total, Counters), emqx_authn_success => ?MG0(success, Counters), @@ -297,7 +291,7 @@ lookup_authn_metrics_local(Id) -> end. authn_metric_names() -> - emqx_prometheus_utils:metric_names(?AUTHNS_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?AUTHNS_WITH_TYPE). %%==================== %% Authn users count @@ -364,7 +358,7 @@ lookup_authz_metrics_local(Type) -> case emqx_authz_api_sources:lookup_from_local_node(Type) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authz_status => emqx_prometheus_utils:status_to_number(Status), + emqx_authz_status => emqx_prometheus_cluster:status_to_number(Status), emqx_authz_nomatch => ?MG0(nomatch, Counters), emqx_authz_total => ?MG0(total, Counters), emqx_authz_success => ?MG0(success, Counters), @@ -375,7 +369,7 @@ lookup_authz_metrics_local(Type) -> end. authz_metric_names() -> - emqx_prometheus_utils:metric_names(?AUTHZS_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?AUTHZS_WITH_TYPE). %%==================== %% Authz rules count @@ -418,7 +412,7 @@ banned_count_data() -> %% merge / zip formatting funcs for type `application/json` collect_json_data(Data) -> - emqx_prometheus_utils:collect_json_data(Data, fun zip_json_auth_metrics/3). + emqx_prometheus_cluster:collect_json_data(Data, fun zip_json_auth_metrics/3). collect_banned_data() -> #{emqx_banned_count => banned_count_data()}. @@ -440,7 +434,7 @@ zip_json_auth_metrics(Key, Points, [] = _AccIn) -> Points ); zip_json_auth_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + ThisKeyResult = lists:foldl(emqx_prometheus_cluster:point_to_map_fun(Key), [], Points), lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). users_or_rule_count(#{id := Id}) -> @@ -462,14 +456,6 @@ users_or_rule_count(#{type := Type}) -> users_or_rule_count(_) -> #{}. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% merge / zip formatting funcs for type `text/plain` -aggre_cluster(ResL) -> - emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). - -with_node_name_label(ResL) -> - emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Helper funcs @@ -488,12 +474,6 @@ mnesia_size(Tab) -> mnesia:table_info(Tab, size). do_metric(emqx_authn_enable, #{enable := B}, _) -> - emqx_prometheus_utils:boolean_to_number(B); + emqx_prometheus_cluster:boolean_to_number(B); do_metric(K, _, Metrics) -> ?MG0(K, Metrics). - -aggre_or_zip_init_acc() -> - #{ - authn => maps:from_keys(authn_metric_names(), []), - authz => maps:from_keys(authz_metric_names(), []) - }. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_utils.erl b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl similarity index 76% rename from apps/emqx_prometheus/src/emqx_prometheus_utils.erl rename to apps/emqx_prometheus/src/emqx_prometheus_cluster.erl index fadfb5c47..e48df0f8b 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_utils.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl @@ -13,9 +13,13 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %%-------------------------------------------------------------------- --module(emqx_prometheus_utils). +-module(emqx_prometheus_cluster). + +-include("emqx_prometheus.hrl"). -export([ + raw_data/2, + collect_json_data/2, aggre_cluster/3, @@ -28,9 +32,34 @@ metric_names/1 ]). +-callback fetch_cluster_consistented_data() -> map(). + +-callback fetch_data_from_local_node() -> {node(), map()}. + +-callback aggre_or_zip_init_acc() -> map(). + -define(MG(K, MAP), maps:get(K, MAP)). -define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). +raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> + AllNodesMetrics = aggre_cluster(Module), + Cluster = Module:fetch_cluster_consistented_data(), + maps:merge(AllNodesMetrics, Cluster); +raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> + AllNodesMetrics = with_node_name_label(Module), + Cluster = Module:fetch_cluster_consistented_data(), + maps:merge(AllNodesMetrics, Cluster); +raw_data(Module, ?PROM_DATA_MODE__NODE) -> + {_Node, LocalNodeMetrics} = Module:fetch_data_from_local_node(), + Cluster = Module:fetch_cluster_consistented_data(), + maps:merge(LocalNodeMetrics, Cluster). + +metrics_data_from_all_nodes(Module) -> + Nodes = mria:running_nodes(), + _ResL = emqx_prometheus_proto_v2:raw_prom_data( + Nodes, Module, fetch_data_from_local_node, [] + ). + collect_json_data(Data, Func) when is_function(Func, 3) -> maps:fold( fun(K, V, Acc) -> @@ -42,6 +71,17 @@ collect_json_data(Data, Func) when is_function(Func, 3) -> collect_json_data(_, _) -> error(badarg). +aggre_cluster(Module) -> + do_aggre_cluster( + Module:logic_sum_metrics(), + metrics_data_from_all_nodes(Module), + Module:aggre_or_zip_init_acc() + ). + +with_node_name_label(Module) -> + ResL = metrics_data_from_all_nodes(Module), + do_with_node_name_label(ResL, Module:aggre_or_zip_init_acc()). + aggre_cluster(LogicSumKs, ResL, Init) -> do_aggre_cluster(LogicSumKs, ResL, Init). @@ -58,7 +98,6 @@ do_aggre_cluster(LogicSumKs, [{ok, {_NodeName, NodeMetric}} | Rest], AccIn) -> AccIn, NodeMetric ) - %% merge_node_and_acc() ); do_aggre_cluster(LogicSumKs, [{_, _} | Rest], AccIn) -> do_aggre_cluster(LogicSumKs, Rest, AccIn). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 729e1f640..bfd011eaa 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -29,8 +29,12 @@ ]). %% for bpapi +-behaviour(emqx_prometheus_cluster). -export([ - fetch_metric_data_from_local_node/0 + fetch_data_from_local_node/0, + fetch_cluster_consistented_data/0, + aggre_or_zip_init_acc/0, + logic_sum_metrics/0 ]). -export([add_collect_family/4]). @@ -120,6 +124,37 @@ emqx_connector_status ]). +%%-------------------------------------------------------------------- +%% Callback for emqx_prometheus_cluster +%%-------------------------------------------------------------------- + +fetch_data_from_local_node() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + {node(self()), #{ + rule_specific_data => rule_specific_data(Rules), + action_specific_data => action_specific_data(Bridges), + connector_specific_data => connector_specific_data(Bridges) + }}. + +fetch_cluster_consistented_data() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + (maybe_collect_schema_registry())#{ + rules_data => rules_data(Rules), + connectors_data => connectors_data(Bridges) + }. + +aggre_or_zip_init_acc() -> + #{ + rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), + action_specific_data => maps:from_keys(action_specific_metric_names(), []), + connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) + }. + +logic_sum_metrics() -> + ?LOGICAL_SUM_METRIC_NAMES. + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -132,7 +167,7 @@ deregister_cleanup(_) -> ok. _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), %% Data Integration Overview ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), @@ -157,7 +192,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ @@ -183,24 +218,6 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). -%% @private -fetch_metric_data_from_local_node() -> - Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), - {node(self()), #{ - rule_specific_data => rule_specific_data(Rules), - action_specific_data => action_specific_data(Bridges), - connector_specific_data => connector_specific_data(Bridges) - }}. - -fetch_cluster_consistented_metric_data() -> - Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), - (maybe_collect_schema_registry())#{ - rules_data => rules_data(Rules), - connectors_data => connectors_data(Bridges) - }. - -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> ok = add_collect_family(Callback, ?SCHEMA_REGISTRY_WITH_TYPE, schema_registry_data()), @@ -216,24 +233,6 @@ maybe_collect_schema_registry() -> #{}. -endif. -%% raw data for different format modes -raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> - AggregatedNodesMetrics = aggre_cluster(metrics_data_from_all_nodes()), - maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> - %% then fold from all nodes - AllNodesMetrics = with_node_name_label(metrics_data_from_all_nodes()), - maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__NODE) -> - {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), - maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). - -metrics_data_from_all_nodes() -> - Nodes = mria:running_nodes(), - _ResL = emqx_prometheus_proto_v2:raw_prom_data( - Nodes, ?MODULE, fetch_metric_data_from_local_node, [] - ). - %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -398,7 +397,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of #{counters := Counters} -> #{ - emqx_rule_enable => emqx_prometheus_utils:boolean_to_number(Bool), + emqx_rule_enable => emqx_prometheus_cluster:boolean_to_number(Bool), emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), emqx_rule_passed => ?MG(passed, Counters), @@ -415,7 +414,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> end. rule_specific_metric_names() -> - emqx_prometheus_utils:metric_names(?RULES_SPECIFIC_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?RULES_SPECIFIC_WITH_TYPE). %%==================== %% Specific Action @@ -469,7 +468,7 @@ get_bridge_metric(Type, Name) -> end. action_specific_metric_names() -> - emqx_prometheus_utils:metric_names(?ACTION_SPECIFIC_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?ACTION_SPECIFIC_WITH_TYPE). %%==================== %% Specific Connector @@ -501,12 +500,12 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), Status = ?MG(status, ResourceData), #{ - emqx_connector_enable => emqx_prometheus_utils:boolean_to_number(Enabled), - emqx_connector_status => emqx_prometheus_utils:status_to_number(Status) + emqx_connector_enable => emqx_prometheus_cluster:boolean_to_number(Enabled), + emqx_connector_status => emqx_prometheus_cluster:status_to_number(Status) }. connectr_specific_metric_names() -> - emqx_prometheus_utils:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). %%-------------------------------------------------------------------- %% Collect functions @@ -521,19 +520,19 @@ collect_data_integration_overview(Rules, Bridges) -> M1 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, - emqx_prometheus_utils:metric_names(?RULES_WITH_TYPE) + emqx_prometheus_cluster:metric_names(?RULES_WITH_TYPE) ), M2 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, - emqx_prometheus_utils:metric_names(?CONNECTORS_WITH_TYPE) + emqx_prometheus_cluster:metric_names(?CONNECTORS_WITH_TYPE) ), M3 = maybe_collect_schema_registry(), lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). collect_json_data(Data) -> - emqx_prometheus_utils:collect_json_data(Data, fun zip_json_data_integration_metrics/3). + emqx_prometheus_cluster:collect_json_data(Data, fun zip_json_data_integration_metrics/3). %% for initialized empty AccIn %% The following fields will be put into Result @@ -555,23 +554,5 @@ zip_json_data_integration_metrics(Key, Points, [] = _AccIn) -> Points ); zip_json_data_integration_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + ThisKeyResult = lists:foldl(emqx_prometheus_cluster:point_to_map_fun(Key), [], Points), lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% merge / zip formatting funcs for type `text/plain` -aggre_cluster(ResL) -> - emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). - -with_node_name_label(ResL) -> - emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% Helper funcs - -aggre_or_zip_init_acc() -> - #{ - rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), - action_specific_data => maps:from_keys(action_specific_metric_names(), []), - connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) - }. From 6b064dd8eb76a79b293767bbe5715408b12e51c2 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 17:14:41 +0800 Subject: [PATCH 41/89] fix(prom_stats): missing metric key - follow https://github.com/emqx/emqx/pull/11497 --- apps/emqx/src/emqx_stats.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/emqx/src/emqx_stats.erl b/apps/emqx/src/emqx_stats.erl index dfd3115f0..9685823ff 100644 --- a/apps/emqx/src/emqx_stats.erl +++ b/apps/emqx/src/emqx_stats.erl @@ -166,6 +166,8 @@ names() -> emqx_live_connections_max, emqx_sessions_count, emqx_sessions_max, + emqx_channels_count, + emqx_channels_max, emqx_topics_count, emqx_topics_max, emqx_suboptions_count, From b424f8ac1222cf6fe7263bdd1011052021d02001 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 17:35:12 +0800 Subject: [PATCH 42/89] feat(prom_stats): aggregated/unaggregated prometheus data --- apps/emqx_prometheus/src/emqx_prometheus.erl | 1135 +++++++++-------- .../src/emqx_prometheus_auth.erl | 130 +- .../src/emqx_prometheus_data_integration.erl | 364 +++--- 3 files changed, 838 insertions(+), 791 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 264d818c9..af35acc36 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -22,6 +22,16 @@ %% be used by the prometheus application -behaviour(prometheus_collector). +-behaviour(emqx_prometheus_cluster). +-export([ + fetch_data_from_local_node/0, + fetch_cluster_consistented_data/0, + aggre_or_zip_init_acc/0, + logic_sum_metrics/0 +]). + +-export([zip_json_prom_stats_metrics/3]). + -include("emqx_prometheus.hrl"). -include_lib("public_key/include/public_key.hrl"). @@ -34,7 +44,7 @@ create_mf/5, gauge_metric/1, gauge_metrics/1, - counter_metric/1 + counter_metrics/1 ] ). @@ -67,12 +77,21 @@ do_stop/0 ]). +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(MG0(K, MAP), maps:get(K, MAP, 0)). + -define(C(K, L), proplists:get_value(K, L, 0)). -define(TIMER_MSG, '#interval'). -define(HTTP_OPTIONS, [{autoredirect, true}, {timeout, 60000}]). +-define(LOGICAL_SUM_METRIC_NAMES, []). + %%-------------------------------------------------------------------- %% APIs %%-------------------------------------------------------------------- @@ -172,85 +191,96 @@ deregister_cleanup(?PROMETHEUS_DEFAULT_REGISTRY) -> ok. collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> - Metrics = emqx_metrics:all(), - Stats = emqx_stats:getstats(), - VMData = emqx_vm_data(), - ClusterData = emqx_cluster_data(), - CertsData = emqx_certs_data(), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), %% TODO: license expiry epoch and cert expiry epoch should be cached - _ = [add_collect_family(Name, CertsData, Callback, gauge) || Name <- emqx_certs()], - _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], - _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], - _ = [add_collect_family(Name, ClusterData, Callback, gauge) || Name <- emqx_cluster()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_packets()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_messages()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_delivery()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_client()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_session()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_olp()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_acl()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_authn()], - ok = maybe_collect_family_license(Callback), + ok = add_collect_family(Callback, stats_metric_meta(), ?MG(stats_data, RawData)), + ok = add_collect_family(Callback, vm_metric_meta(), ?MG(vm_data, RawData)), + ok = add_collect_family(Callback, cluster_metric_meta(), ?MG(cluster_data, RawData)), + + ok = add_collect_family(Callback, emqx_packet_metric_meta(), ?MG(emqx_packet_data, RawData)), + ok = add_collect_family(Callback, message_metric_meta(), ?MG(emqx_message_data, RawData)), + ok = add_collect_family(Callback, delivery_metric_meta(), ?MG(emqx_delivery_data, RawData)), + ok = add_collect_family(Callback, client_metric_meta(), ?MG(emqx_client_data, RawData)), + ok = add_collect_family(Callback, session_metric_meta(), ?MG(emqx_session_data, RawData)), + ok = add_collect_family(Callback, olp_metric_meta(), ?MG(emqx_olp_data, RawData)), + ok = add_collect_family(Callback, acl_metric_meta(), ?MG(emqx_acl_data, RawData)), + ok = add_collect_family(Callback, authn_metric_meta(), ?MG(emqx_authn_data, RawData)), + + ok = add_collect_family(Callback, cert_metric_meta(), ?MG(cert_data, RawData)), + ok = maybe_license_add_collect_family(Callback, RawData), ok; collect_mf(_Registry, _Callback) -> ok. %% @private collect(<<"json">>) -> - Metrics = emqx_metrics:all(), - Stats = emqx_stats:getstats(), - VMData = emqx_vm_data(), - %% TODO: FIXME! - %% emqx_metrics_olp()), - %% emqx_metrics_acl()), - %% emqx_metrics_authn()), - (maybe_collect_license())#{ - certs => collect_certs_json(emqx_certs_data()), - stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), - metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), - packets => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_packets()]), - messages => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_messages()]), - delivery => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_delivery()]), - client => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_client()]), - session => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_session()]) + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), + (maybe_license_collect_json_data(RawData))#{ + stats => collect_json_data(?MG(stats_data, RawData)), + metrics => collect_json_data(?MG(vm_data, RawData)), + packets => collect_json_data(?MG(emqx_packet_data, RawData)), + messages => collect_json_data(?MG(emqx_message_data, RawData)), + delivery => collect_json_data(?MG(emqx_delivery_data, RawData)), + client => collect_json_data(?MG(emqx_client_data, RawData)), + session => collect_json_data(?MG(emqx_session_data, RawData)), + cluster => collect_json_data(?MG(cluster_data, RawData)), + olp => collect_json_data(?MG(emqx_olp_data, RawData)), + acl => collect_json_data(?MG(emqx_acl_data, RawData)), + authn => collect_json_data(?MG(emqx_authn_data, RawData)), + certs => collect_cert_json_data(?MG(cert_data, RawData)) }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DEFAULT_REGISTRY). -%% @private -collect_stats(Name, Stats) -> - R = collect_metrics(Name, Stats), - case R#'Metric'.gauge of - undefined -> - {_, Val} = R#'Metric'.counter, - {Name, Val}; - {_, Val} -> - {Name, Val} - end. - collect_metrics(Name, Metrics) -> emqx_collect(Name, Metrics). +add_collect_family(Callback, MetricWithType, Data) -> + _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type, _} <- MetricWithType], + ok. + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). --if(?EMQX_RELEASE_EDITION == ee). -maybe_collect_family_license(Callback) -> - LicenseData = emqx_license_data(), - _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], - ok. +%% behaviour +fetch_data_from_local_node() -> + {node(self()), #{ + stats_data => stats_data(), + vm_data => vm_data(), + cluster_data => cluster_data(), + %% Metrics + emqx_packet_data => emqx_metric_data(emqx_packet_metric_meta()), + emqx_message_data => emqx_metric_data(message_metric_meta()), + emqx_delivery_data => emqx_metric_data(delivery_metric_meta()), + emqx_client_data => emqx_metric_data(client_metric_meta()), + emqx_session_data => emqx_metric_data(session_metric_meta()), + emqx_olp_data => emqx_metric_data(olp_metric_meta()), + emqx_acl_data => emqx_metric_data(acl_metric_meta()), + emqx_authn_data => emqx_metric_data(authn_metric_meta()) + }}. -maybe_collect_license() -> - LicenseData = emqx_license_data(), - #{license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()])}. +fetch_cluster_consistented_data() -> + (maybe_license_fetch_data())#{ + cert_data => cert_data() + }. --else. -maybe_collect_family_license(_) -> - ok. +aggre_or_zip_init_acc() -> + #{ + stats_data => maps:from_keys(metrics_name(stats_metric_meta()), []), + vm_data => maps:from_keys(metrics_name(vm_metric_meta()), []), + cluster_data => maps:from_keys(metrics_name(cluster_metric_meta()), []), + emqx_packet_data => maps:from_keys(metrics_name(emqx_packet_metric_meta()), []), + emqx_message_data => maps:from_keys(metrics_name(message_metric_meta()), []), + emqx_delivery_data => maps:from_keys(metrics_name(delivery_metric_meta()), []), + emqx_client_data => maps:from_keys(metrics_name(client_metric_meta()), []), + emqx_session_data => maps:from_keys(metrics_name(session_metric_meta()), []), + emqx_olp_data => maps:from_keys(metrics_name(olp_metric_meta()), []), + emqx_acl_data => maps:from_keys(metrics_name(acl_metric_meta()), []), + emqx_authn_data => maps:from_keys(metrics_name(authn_metric_meta()), []) + }. -maybe_collect_license() -> - #{}. --endif. +logic_sum_metrics() -> + ?LOGICAL_SUM_METRIC_NAMES. %%-------------------------------------------------------------------- %% Collector @@ -258,512 +288,513 @@ maybe_collect_license() -> %%-------------------------------------------------------------------- %% Stats - %% connections -emqx_collect(emqx_connections_count, Stats) -> - gauge_metric(?C('connections.count', Stats)); -emqx_collect(emqx_connections_max, Stats) -> - gauge_metric(?C('connections.max', Stats)); -emqx_collect(emqx_live_connections_count, Stats) -> - gauge_metric(?C('live_connections.count', Stats)); -emqx_collect(emqx_live_connections_max, Stats) -> - gauge_metric(?C('live_connections.max', Stats)); +emqx_collect(K = emqx_connections_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_connections_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_live_connections_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_live_connections_max, D) -> gauge_metrics(?MG(K, D)); %% sessions -emqx_collect(emqx_sessions_count, Stats) -> - gauge_metric(?C('sessions.count', Stats)); -emqx_collect(emqx_sessions_max, Stats) -> - gauge_metric(?C('sessions.max', Stats)); -emqx_collect(emqx_channels_count, Stats) -> - gauge_metric(?C('channels.count', Stats)); -emqx_collect(emqx_channels_max, Stats) -> - gauge_metric(?C('channels.max', Stats)); +emqx_collect(K = emqx_sessions_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_sessions_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_channels_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_channels_max, D) -> gauge_metrics(?MG(K, D)); %% pub/sub stats -emqx_collect(emqx_topics_count, Stats) -> - gauge_metric(?C('topics.count', Stats)); -emqx_collect(emqx_topics_max, Stats) -> - gauge_metric(?C('topics.max', Stats)); -emqx_collect(emqx_suboptions_count, Stats) -> - gauge_metric(?C('suboptions.count', Stats)); -emqx_collect(emqx_suboptions_max, Stats) -> - gauge_metric(?C('suboptions.max', Stats)); -emqx_collect(emqx_subscribers_count, Stats) -> - gauge_metric(?C('subscribers.count', Stats)); -emqx_collect(emqx_subscribers_max, Stats) -> - gauge_metric(?C('subscribers.max', Stats)); -emqx_collect(emqx_subscriptions_count, Stats) -> - gauge_metric(?C('subscriptions.count', Stats)); -emqx_collect(emqx_subscriptions_max, Stats) -> - gauge_metric(?C('subscriptions.max', Stats)); -emqx_collect(emqx_subscriptions_shared_count, Stats) -> - gauge_metric(?C('subscriptions.shared.count', Stats)); -emqx_collect(emqx_subscriptions_shared_max, Stats) -> - gauge_metric(?C('subscriptions.shared.max', Stats)); +emqx_collect(K = emqx_topics_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_topics_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_suboptions_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_suboptions_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscribers_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscribers_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_shared_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_shared_max, D) -> gauge_metrics(?MG(K, D)); %% retained -emqx_collect(emqx_retained_count, Stats) -> - gauge_metric(?C('retained.count', Stats)); -emqx_collect(emqx_retained_max, Stats) -> - gauge_metric(?C('retained.max', Stats)); +emqx_collect(K = emqx_retained_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_retained_max, D) -> gauge_metrics(?MG(K, D)); %% delayed -emqx_collect(emqx_delayed_count, Stats) -> - gauge_metric(?C('delayed.count', Stats)); -emqx_collect(emqx_delayed_max, Stats) -> - gauge_metric(?C('delayed.max', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - packets & bytes - -%% bytes -emqx_collect(emqx_bytes_received, Metrics) -> - counter_metric(?C('bytes.received', Metrics)); -emqx_collect(emqx_bytes_sent, Metrics) -> - counter_metric(?C('bytes.sent', Metrics)); -%% received.sent -emqx_collect(emqx_packets_received, Metrics) -> - counter_metric(?C('packets.received', Metrics)); -emqx_collect(emqx_packets_sent, Metrics) -> - counter_metric(?C('packets.sent', Metrics)); -%% connect -emqx_collect(emqx_packets_connect, Metrics) -> - counter_metric(?C('packets.connect.received', Metrics)); -emqx_collect(emqx_packets_connack_sent, Metrics) -> - counter_metric(?C('packets.connack.sent', Metrics)); -emqx_collect(emqx_packets_connack_error, Metrics) -> - counter_metric(?C('packets.connack.error', Metrics)); -emqx_collect(emqx_packets_connack_auth_error, Metrics) -> - counter_metric(?C('packets.connack.auth_error', Metrics)); -%% sub.unsub -emqx_collect(emqx_packets_subscribe_received, Metrics) -> - counter_metric(?C('packets.subscribe.received', Metrics)); -emqx_collect(emqx_packets_subscribe_auth_error, Metrics) -> - counter_metric(?C('packets.subscribe.auth_error', Metrics)); -emqx_collect(emqx_packets_subscribe_error, Metrics) -> - counter_metric(?C('packets.subscribe.error', Metrics)); -emqx_collect(emqx_packets_suback_sent, Metrics) -> - counter_metric(?C('packets.suback.sent', Metrics)); -emqx_collect(emqx_packets_unsubscribe_received, Metrics) -> - counter_metric(?C('packets.unsubscribe.received', Metrics)); -emqx_collect(emqx_packets_unsubscribe_error, Metrics) -> - counter_metric(?C('packets.unsubscribe.error', Metrics)); -emqx_collect(emqx_packets_unsuback_sent, Metrics) -> - counter_metric(?C('packets.unsuback.sent', Metrics)); -%% publish.puback -emqx_collect(emqx_packets_publish_received, Metrics) -> - counter_metric(?C('packets.publish.received', Metrics)); -emqx_collect(emqx_packets_publish_sent, Metrics) -> - counter_metric(?C('packets.publish.sent', Metrics)); -emqx_collect(emqx_packets_publish_inuse, Metrics) -> - counter_metric(?C('packets.publish.inuse', Metrics)); -emqx_collect(emqx_packets_publish_error, Metrics) -> - counter_metric(?C('packets.publish.error', Metrics)); -emqx_collect(emqx_packets_publish_auth_error, Metrics) -> - counter_metric(?C('packets.publish.auth_error', Metrics)); -emqx_collect(emqx_packets_publish_dropped, Metrics) -> - counter_metric(?C('packets.publish.dropped', Metrics)); -%% puback -emqx_collect(emqx_packets_puback_received, Metrics) -> - counter_metric(?C('packets.puback.received', Metrics)); -emqx_collect(emqx_packets_puback_sent, Metrics) -> - counter_metric(?C('packets.puback.sent', Metrics)); -emqx_collect(emqx_packets_puback_inuse, Metrics) -> - counter_metric(?C('packets.puback.inuse', Metrics)); -emqx_collect(emqx_packets_puback_missed, Metrics) -> - counter_metric(?C('packets.puback.missed', Metrics)); -%% pubrec -emqx_collect(emqx_packets_pubrec_received, Metrics) -> - counter_metric(?C('packets.pubrec.received', Metrics)); -emqx_collect(emqx_packets_pubrec_sent, Metrics) -> - counter_metric(?C('packets.pubrec.sent', Metrics)); -emqx_collect(emqx_packets_pubrec_inuse, Metrics) -> - counter_metric(?C('packets.pubrec.inuse', Metrics)); -emqx_collect(emqx_packets_pubrec_missed, Metrics) -> - counter_metric(?C('packets.pubrec.missed', Metrics)); -%% pubrel -emqx_collect(emqx_packets_pubrel_received, Metrics) -> - counter_metric(?C('packets.pubrel.received', Metrics)); -emqx_collect(emqx_packets_pubrel_sent, Metrics) -> - counter_metric(?C('packets.pubrel.sent', Metrics)); -emqx_collect(emqx_packets_pubrel_missed, Metrics) -> - counter_metric(?C('packets.pubrel.missed', Metrics)); -%% pubcomp -emqx_collect(emqx_packets_pubcomp_received, Metrics) -> - counter_metric(?C('packets.pubcomp.received', Metrics)); -emqx_collect(emqx_packets_pubcomp_sent, Metrics) -> - counter_metric(?C('packets.pubcomp.sent', Metrics)); -emqx_collect(emqx_packets_pubcomp_inuse, Metrics) -> - counter_metric(?C('packets.pubcomp.inuse', Metrics)); -emqx_collect(emqx_packets_pubcomp_missed, Metrics) -> - counter_metric(?C('packets.pubcomp.missed', Metrics)); -%% pingreq -emqx_collect(emqx_packets_pingreq_received, Metrics) -> - counter_metric(?C('packets.pingreq.received', Metrics)); -emqx_collect(emqx_packets_pingresp_sent, Metrics) -> - counter_metric(?C('packets.pingresp.sent', Metrics)); -%% disconnect -emqx_collect(emqx_packets_disconnect_received, Metrics) -> - counter_metric(?C('packets.disconnect.received', Metrics)); -emqx_collect(emqx_packets_disconnect_sent, Metrics) -> - counter_metric(?C('packets.disconnect.sent', Metrics)); -%% auth -emqx_collect(emqx_packets_auth_received, Metrics) -> - counter_metric(?C('packets.auth.received', Metrics)); -emqx_collect(emqx_packets_auth_sent, Metrics) -> - counter_metric(?C('packets.auth.sent', Metrics)); -%%-------------------------------------------------------------------- -%% Metrics - messages - -%% messages -emqx_collect(emqx_messages_received, Metrics) -> - counter_metric(?C('messages.received', Metrics)); -emqx_collect(emqx_messages_sent, Metrics) -> - counter_metric(?C('messages.sent', Metrics)); -emqx_collect(emqx_messages_qos0_received, Metrics) -> - counter_metric(?C('messages.qos0.received', Metrics)); -emqx_collect(emqx_messages_qos0_sent, Metrics) -> - counter_metric(?C('messages.qos0.sent', Metrics)); -emqx_collect(emqx_messages_qos1_received, Metrics) -> - counter_metric(?C('messages.qos1.received', Metrics)); -emqx_collect(emqx_messages_qos1_sent, Metrics) -> - counter_metric(?C('messages.qos1.sent', Metrics)); -emqx_collect(emqx_messages_qos2_received, Metrics) -> - counter_metric(?C('messages.qos2.received', Metrics)); -emqx_collect(emqx_messages_qos2_sent, Metrics) -> - counter_metric(?C('messages.qos2.sent', Metrics)); -emqx_collect(emqx_messages_publish, Metrics) -> - counter_metric(?C('messages.publish', Metrics)); -emqx_collect(emqx_messages_dropped, Metrics) -> - counter_metric(?C('messages.dropped', Metrics)); -emqx_collect(emqx_messages_dropped_expired, Metrics) -> - counter_metric(?C('messages.dropped.await_pubrel_timeout', Metrics)); -emqx_collect(emqx_messages_dropped_no_subscribers, Metrics) -> - counter_metric(?C('messages.dropped.no_subscribers', Metrics)); -emqx_collect(emqx_messages_forward, Metrics) -> - counter_metric(?C('messages.forward', Metrics)); -emqx_collect(emqx_messages_retained, Metrics) -> - counter_metric(?C('messages.retained', Metrics)); -emqx_collect(emqx_messages_delayed, Stats) -> - counter_metric(?C('messages.delayed', Stats)); -emqx_collect(emqx_messages_delivered, Stats) -> - counter_metric(?C('messages.delivered', Stats)); -emqx_collect(emqx_messages_acked, Stats) -> - counter_metric(?C('messages.acked', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - delivery - -emqx_collect(emqx_delivery_dropped, Stats) -> - counter_metric(?C('delivery.dropped', Stats)); -emqx_collect(emqx_delivery_dropped_no_local, Stats) -> - counter_metric(?C('delivery.dropped.no_local', Stats)); -emqx_collect(emqx_delivery_dropped_too_large, Stats) -> - counter_metric(?C('delivery.dropped.too_large', Stats)); -emqx_collect(emqx_delivery_dropped_qos0_msg, Stats) -> - counter_metric(?C('delivery.dropped.qos0_msg', Stats)); -emqx_collect(emqx_delivery_dropped_queue_full, Stats) -> - counter_metric(?C('delivery.dropped.queue_full', Stats)); -emqx_collect(emqx_delivery_dropped_expired, Stats) -> - counter_metric(?C('delivery.dropped.expired', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - client -emqx_collect(emqx_client_connect, Stats) -> - counter_metric(?C('client.connect', Stats)); -emqx_collect(emqx_client_connack, Stats) -> - counter_metric(?C('client.connack', Stats)); -emqx_collect(emqx_client_connected, Stats) -> - counter_metric(?C('client.connected', Stats)); -emqx_collect(emqx_client_authenticate, Stats) -> - counter_metric(?C('client.authenticate', Stats)); -emqx_collect(emqx_client_auth_anonymous, Stats) -> - counter_metric(?C('client.auth.anonymous', Stats)); -emqx_collect(emqx_client_authorize, Stats) -> - counter_metric(?C('client.authorize', Stats)); -emqx_collect(emqx_client_subscribe, Stats) -> - counter_metric(?C('client.subscribe', Stats)); -emqx_collect(emqx_client_unsubscribe, Stats) -> - counter_metric(?C('client.unsubscribe', Stats)); -emqx_collect(emqx_client_disconnected, Stats) -> - counter_metric(?C('client.disconnected', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - session - -emqx_collect(emqx_session_created, Stats) -> - counter_metric(?C('session.created', Stats)); -emqx_collect(emqx_session_resumed, Stats) -> - counter_metric(?C('session.resumed', Stats)); -emqx_collect(emqx_session_takenover, Stats) -> - counter_metric(?C('session.takenover', Stats)); -emqx_collect(emqx_session_discarded, Stats) -> - counter_metric(?C('session.discarded', Stats)); -emqx_collect(emqx_session_terminated, Stats) -> - counter_metric(?C('session.terminated', Stats)); -%%-------------------------------------------------------------------- - -%% Metrics - overload protection -emqx_collect(emqx_overload_protection_delay_ok, Stats) -> - counter_metric(?C('overload_protection.delay.ok', Stats)); -emqx_collect(emqx_overload_protection_delay_timeout, Stats) -> - counter_metric(?C('overload_protection.delay.timeout', Stats)); -emqx_collect(emqx_overload_protection_hibernation, Stats) -> - counter_metric(?C('overload_protection.hibernation', Stats)); -emqx_collect(emqx_overload_protection_gc, Stats) -> - counter_metric(?C('overload_protection.gc', Stats)); -emqx_collect(emqx_overload_protection_new_conn, Stats) -> - counter_metric(?C('overload_protection.new_conn', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - acl -emqx_collect(emqx_authorization_allow, Stats) -> - counter_metric(?C('authorization.allow', Stats)); -emqx_collect(emqx_authorization_deny, Stats) -> - counter_metric(?C('authorization.deny', Stats)); -emqx_collect(emqx_authorization_cache_hit, Stats) -> - counter_metric(?C('authorization.cache_hit', Stats)); -emqx_collect(emqx_authorization_cache_miss, Stats) -> - counter_metric(?C('authorization.cache_miss', Stats)); -emqx_collect(emqx_authorization_superuser, Stats) -> - counter_metric(?C('authorization.superuser', Stats)); -emqx_collect(emqx_authorization_nomatch, Stats) -> - counter_metric(?C('authorization.nomatch', Stats)); -emqx_collect(emqx_authorization_matched_allow, Stats) -> - counter_metric(?C('authorization.matched_allow', Stats)); -emqx_collect(emqx_authorization_matched_deny, Stats) -> - counter_metric(?C('authorization.matched_deny', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - authn -emqx_collect(emqx_authentication_success, Stats) -> - counter_metric(?C('authentication.success', Stats)); -emqx_collect(emqx_authentication_success_anonymous, Stats) -> - counter_metric(?C('authentication.success.anonymous', Stats)); -emqx_collect(emqx_authentication_failure, Stats) -> - counter_metric(?C('authentication.failure', Stats)); +emqx_collect(K = emqx_delayed_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_delayed_max, D) -> gauge_metrics(?MG(K, D)); %%-------------------------------------------------------------------- %% VM -emqx_collect(emqx_vm_cpu_use, VMData) -> - gauge_metric(?C(cpu_use, VMData)); -emqx_collect(emqx_vm_cpu_idle, VMData) -> - gauge_metric(?C(cpu_idle, VMData)); -emqx_collect(emqx_vm_run_queue, VMData) -> - gauge_metric(?C(run_queue, VMData)); -emqx_collect(emqx_vm_process_messages_in_queues, VMData) -> - gauge_metric(?C(process_total_messages, VMData)); -emqx_collect(emqx_vm_total_memory, VMData) -> - gauge_metric(?C(total_memory, VMData)); -emqx_collect(emqx_vm_used_memory, VMData) -> - gauge_metric(?C(used_memory, VMData)); -emqx_collect(emqx_cluster_nodes_running, ClusterData) -> - gauge_metric(?C(nodes_running, ClusterData)); -emqx_collect(emqx_cluster_nodes_stopped, ClusterData) -> - gauge_metric(?C(nodes_stopped, ClusterData)); +emqx_collect(K = emqx_vm_cpu_use, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_cpu_idle, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_run_queue, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_process_messages_in_queues, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_total_memory, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_used_memory, D) -> gauge_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Cluster Info +emqx_collect(K = emqx_cluster_nodes_running, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_cluster_nodes_stopped, D) -> gauge_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - packets & bytes +%% bytes +emqx_collect(K = emqx_bytes_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_bytes_sent, D) -> counter_metrics(?MG(K, D)); +%% received.sent +emqx_collect(K = emqx_packets_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_sent, D) -> counter_metrics(?MG(K, D)); +%% connect +emqx_collect(K = emqx_packets_connect, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_connack_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_connack_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_connack_auth_error, D) -> counter_metrics(?MG(K, D)); +%% sub.unsub +emqx_collect(K = emqx_packets_subscribe_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_subscribe_auth_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_subscribe_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_suback_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_unsubscribe_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_unsubscribe_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_unsuback_sent, D) -> counter_metrics(?MG(K, D)); +%% publish.puback +emqx_collect(K = emqx_packets_publish_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_auth_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_dropped, D) -> counter_metrics(?MG(K, D)); +%% puback +emqx_collect(K = emqx_packets_puback_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_puback_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_puback_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_puback_missed, D) -> counter_metrics(?MG(K, D)); +%% pubrec +emqx_collect(K = emqx_packets_pubrec_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrec_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrec_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrec_missed, D) -> counter_metrics(?MG(K, D)); +%% pubrel +emqx_collect(K = emqx_packets_pubrel_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrel_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrel_missed, D) -> counter_metrics(?MG(K, D)); +%% pubcomp +emqx_collect(K = emqx_packets_pubcomp_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubcomp_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubcomp_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubcomp_missed, D) -> counter_metrics(?MG(K, D)); +%% pingreq +emqx_collect(K = emqx_packets_pingreq_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pingresp_sent, D) -> counter_metrics(?MG(K, D)); +%% disconnect +emqx_collect(K = emqx_packets_disconnect_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_disconnect_sent, D) -> counter_metrics(?MG(K, D)); +%% auth +emqx_collect(K = emqx_packets_auth_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_auth_sent, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - messages +%% messages +emqx_collect(K = emqx_messages_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos0_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos0_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos1_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos1_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos2_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos2_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_publish, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_dropped, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_dropped_expired, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_dropped_no_subscribers, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_forward, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_retained, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_delayed, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_delivered, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_acked, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - delivery +emqx_collect(K = emqx_delivery_dropped, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_no_local, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_too_large, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_qos0_msg, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_queue_full, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_expired, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - client +emqx_collect(K = emqx_client_connect, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_connack, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_connected, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_authenticate, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_auth_anonymous, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_authorize, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_subscribe, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_unsubscribe, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_disconnected, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - session +emqx_collect(K = emqx_session_created, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_resumed, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_takenover, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_discarded, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_terminated, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - overload protection +emqx_collect(K = emqx_overload_protection_delay_ok, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_delay_timeout, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_hibernation, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_gc, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_new_conn, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - acl +emqx_collect(K = emqx_authorization_allow, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_deny, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_cache_hit, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_cache_miss, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_superuser, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_nomatch, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_matched_allow, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_matched_deny, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - authn +emqx_collect(K = emqx_authentication_success, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authentication_success_anonymous, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authentication_failure, D) -> counter_metrics(?MG(K, D)); %%-------------------------------------------------------------------- %% License -emqx_collect(emqx_license_expiry_at, LicenseData) -> - gauge_metric(?C(expiry_at, LicenseData)); +emqx_collect(K = emqx_license_expiry_at, D) -> gauge_metric(?MG(K, D)); %%-------------------------------------------------------------------- %% Certs -emqx_collect(emqx_cert_expiry_at, CertsData) -> - gauge_metrics(CertsData). +emqx_collect(K = emqx_cert_expiry_at, D) -> gauge_metrics(?MG(K, D)). %%-------------------------------------------------------------------- %% Indicators %%-------------------------------------------------------------------- -emqx_metrics_packets() -> +%%======================================== +%% Stats +%%======================================== + +stats_metric_meta() -> [ - emqx_bytes_received, - emqx_bytes_sent, - emqx_packets_received, - emqx_packets_sent, - emqx_packets_connect, - emqx_packets_connack_sent, - emqx_packets_connack_error, - emqx_packets_connack_auth_error, - emqx_packets_publish_received, - emqx_packets_publish_sent, - emqx_packets_publish_inuse, - emqx_packets_publish_error, - emqx_packets_publish_auth_error, - emqx_packets_publish_dropped, - emqx_packets_puback_received, - emqx_packets_puback_sent, - emqx_packets_puback_inuse, - emqx_packets_puback_missed, - emqx_packets_pubrec_received, - emqx_packets_pubrec_sent, - emqx_packets_pubrec_inuse, - emqx_packets_pubrec_missed, - emqx_packets_pubrel_received, - emqx_packets_pubrel_sent, - emqx_packets_pubrel_missed, - emqx_packets_pubcomp_received, - emqx_packets_pubcomp_sent, - emqx_packets_pubcomp_inuse, - emqx_packets_pubcomp_missed, - emqx_packets_subscribe_received, - emqx_packets_subscribe_error, - emqx_packets_subscribe_auth_error, - emqx_packets_suback_sent, - emqx_packets_unsubscribe_received, - emqx_packets_unsubscribe_error, - emqx_packets_unsuback_sent, - emqx_packets_pingreq_received, - emqx_packets_pingresp_sent, - emqx_packets_disconnect_received, - emqx_packets_disconnect_sent, - emqx_packets_auth_received, - emqx_packets_auth_sent + %% connections + {emqx_connections_count, counter, 'connections.count'}, + {emqx_connections_max, counter, 'connections.max'}, + {emqx_live_connections_count, counter, 'live_connections.count'}, + {emqx_live_connections_max, counter, 'live_connections.max'}, + %% sessions + {emqx_sessions_count, counter, 'sessions.count'}, + {emqx_sessions_max, counter, 'sessions.max'}, + {emqx_channels_count, counter, 'channels.count'}, + {emqx_channels_max, counter, 'channels.max'}, + %% pub/sub stats + {emqx_topics_count, counter, 'topics.count'}, + {emqx_topics_max, counter, 'topics.max'}, + {emqx_suboptions_count, counter, 'suboptions.count'}, + {emqx_suboptions_max, counter, 'suboptions.max'}, + {emqx_subscribers_count, counter, 'subscribers.count'}, + {emqx_subscribers_max, counter, 'subscribers.max'}, + {emqx_subscriptions_count, counter, 'subscriptions.count'}, + {emqx_subscriptions_max, counter, 'subscriptions.max'}, + {emqx_subscriptions_shared_count, counter, 'subscriptions.shared.count'}, + {emqx_subscriptions_shared_max, counter, 'subscriptions.shared.max'}, + %% retained + {emqx_retained_count, counter, 'retained.count'}, + {emqx_retained_max, counter, 'retained.max'}, + %% delayed + {emqx_delayed_count, counter, 'delayed.count'}, + {emqx_delayed_max, counter, 'delayed.max'} ]. -emqx_metrics_olp() -> - case emqx_config_zones:is_olp_enabled() of - true -> - [ - emqx_overload_protection_delay_ok, - emqx_overload_protection_delay_timeout, - emqx_overload_protection_hibernation, - emqx_overload_protection_gc, - emqx_overload_protection_new_conn - ]; - false -> - [] - end. +stats_data() -> + Stats = emqx_stats:getstats(), + lists:foldl( + fun({Name, _Type, MetricKAtom}, AccIn) -> + AccIn#{Name => [{[], ?C(MetricKAtom, Stats)}]} + end, + #{}, + stats_metric_meta() + ). -emqx_metrics_acl() -> +%%======================================== +%% Erlang VM +%%======================================== + +vm_metric_meta() -> [ - emqx_authorization_allow, - emqx_authorization_deny, - emqx_authorization_cache_hit, - emqx_authorization_cache_miss, - emqx_authorization_superuser, - emqx_authorization_nomatch, - emqx_authorization_matched_allow, - emqx_authorization_matched_deny + {emqx_vm_cpu_use, gauge, 'cpu_use'}, + {emqx_vm_cpu_idle, gauge, 'cpu_idle'}, + {emqx_vm_run_queue, gauge, 'run_queue'}, + {emqx_vm_process_messages_in_queues, gauge, 'process_total_messages'}, + {emqx_vm_total_memory, gauge, 'total_memory'}, + {emqx_vm_used_memory, gauge, 'used_memory'} ]. -emqx_metrics_authn() -> +vm_data() -> + VmStats = emqx_mgmt:vm_stats(), + lists:foldl( + fun({Name, _Type, MetricKAtom}, AccIn) -> + AccIn#{Name => [{[], ?C(MetricKAtom, VmStats)}]} + end, + #{}, + vm_metric_meta() + ). + +%%======================================== +%% Cluster +%%======================================== + +cluster_metric_meta() -> [ - emqx_authentication_success, - emqx_authentication_success_anonymous, - emqx_authentication_failure + {emqx_cluster_nodes_running, gauge, undefined}, + {emqx_cluster_nodes_stopped, gauge, undefined} ]. -emqx_metrics_messages() -> - [ - emqx_messages_received, - emqx_messages_sent, - emqx_messages_qos0_received, - emqx_messages_qos0_sent, - emqx_messages_qos1_received, - emqx_messages_qos1_sent, - emqx_messages_qos2_received, - emqx_messages_qos2_sent, - emqx_messages_publish, - emqx_messages_dropped, - emqx_messages_dropped_expired, - emqx_messages_dropped_no_subscribers, - emqx_messages_forward, - emqx_messages_retained, - emqx_messages_delayed, - emqx_messages_delivered, - emqx_messages_acked - ]. - -emqx_metrics_delivery() -> - [ - emqx_delivery_dropped, - emqx_delivery_dropped_no_local, - emqx_delivery_dropped_too_large, - emqx_delivery_dropped_qos0_msg, - emqx_delivery_dropped_queue_full, - emqx_delivery_dropped_expired - ]. - -emqx_metrics_client() -> - [ - emqx_client_connect, - emqx_client_connack, - emqx_client_connected, - emqx_client_authenticate, - emqx_client_auth_anonymous, - emqx_client_authorize, - emqx_client_subscribe, - emqx_client_unsubscribe, - emqx_client_disconnected - ]. - -emqx_metrics_session() -> - [ - emqx_session_created, - emqx_session_resumed, - emqx_session_takenover, - emqx_session_discarded, - emqx_session_terminated - ]. - -emqx_vm() -> - [ - emqx_vm_cpu_use, - emqx_vm_cpu_idle, - emqx_vm_run_queue, - emqx_vm_process_messages_in_queues, - emqx_vm_total_memory, - emqx_vm_used_memory - ]. - -emqx_vm_data() -> - emqx_mgmt:vm_stats(). - -emqx_cluster() -> - [ - emqx_cluster_nodes_running, - emqx_cluster_nodes_stopped - ]. - -emqx_cluster_data() -> +cluster_data() -> Running = emqx:cluster_nodes(running), Stopped = emqx:cluster_nodes(stopped), + #{ + emqx_cluster_nodes_running => [{[], length(Running)}], + emqx_cluster_nodes_stopped => [{[], length(Stopped)}] + }. + +%%======================================== +%% Metrics +%%======================================== + +emqx_metric_data(MetricNameTypeKeyL) -> + Metrics = emqx_metrics:all(), + lists:foldl( + fun({Name, _Type, MetricKAtom}, AccIn) -> + AccIn#{Name => [{[], ?C(MetricKAtom, Metrics)}]} + end, + #{}, + MetricNameTypeKeyL + ). + +%%========== +%% Bytes && Packets +emqx_packet_metric_meta() -> [ - {nodes_running, length(Running)}, - {nodes_stopped, length(Stopped)} + {emqx_bytes_received, counter, 'bytes.received'}, + {emqx_bytes_sent, counter, 'bytes.sent'}, + %% received.sent + {emqx_packets_received, counter, 'packets.received'}, + {emqx_packets_sent, counter, 'packets.sent'}, + %% connect + {emqx_packets_connect, counter, 'packets.connect.received'}, + {emqx_packets_connack_sent, counter, 'packets.connack.sent'}, + {emqx_packets_connack_error, counter, 'packets.connack.error'}, + {emqx_packets_connack_auth_error, counter, 'packets.connack.auth_error'}, + %% sub.unsub + {emqx_packets_subscribe_received, counter, 'packets.subscribe.received'}, + {emqx_packets_subscribe_auth_error, counter, 'packets.subscribe.auth_error'}, + {emqx_packets_subscribe_error, counter, 'packets.subscribe.error'}, + {emqx_packets_suback_sent, counter, 'packets.suback.sent'}, + {emqx_packets_unsubscribe_received, counter, 'packets.unsubscribe.received'}, + {emqx_packets_unsubscribe_error, counter, 'packets.unsubscribe.error'}, + {emqx_packets_unsuback_sent, counter, 'packets.unsuback.sent'}, + %% publish.puback + {emqx_packets_publish_received, counter, 'packets.publish.received'}, + {emqx_packets_publish_sent, counter, 'packets.publish.sent'}, + {emqx_packets_publish_inuse, counter, 'packets.publish.inuse'}, + {emqx_packets_publish_error, counter, 'packets.publish.error'}, + {emqx_packets_publish_auth_error, counter, 'packets.publish.auth_error'}, + {emqx_packets_publish_dropped, counter, 'packets.publish.dropped'}, + %% puback + {emqx_packets_puback_received, counter, 'packets.puback.received'}, + {emqx_packets_puback_sent, counter, 'packets.puback.sent'}, + {emqx_packets_puback_inuse, counter, 'packets.puback.inuse'}, + {emqx_packets_puback_missed, counter, 'packets.puback.missed'}, + %% pubrec + {emqx_packets_pubrec_received, counter, 'packets.pubrec.received'}, + {emqx_packets_pubrec_sent, counter, 'packets.pubrec.sent'}, + {emqx_packets_pubrec_inuse, counter, 'packets.pubrec.inuse'}, + {emqx_packets_pubrec_missed, counter, 'packets.pubrec.missed'}, + %% pubrel + {emqx_packets_pubrel_received, counter, 'packets.pubrel.received'}, + {emqx_packets_pubrel_sent, counter, 'packets.pubrel.sent'}, + {emqx_packets_pubrel_missed, counter, 'packets.pubrel.missed'}, + %% pubcomp + {emqx_packets_pubcomp_received, counter, 'packets.pubcomp.received'}, + {emqx_packets_pubcomp_sent, counter, 'packets.pubcomp.sent'}, + {emqx_packets_pubcomp_inuse, counter, 'packets.pubcomp.inuse'}, + {emqx_packets_pubcomp_missed, counter, 'packets.pubcomp.missed'}, + %% pingreq + {emqx_packets_pingreq_received, counter, 'packets.pingreq.received'}, + {emqx_packets_pingresp_sent, counter, 'packets.pingresp.sent'}, + %% disconnect + {emqx_packets_disconnect_received, counter, 'packets.disconnect.received'}, + {emqx_packets_disconnect_sent, counter, 'packets.disconnect.sent'}, + %% auth + {emqx_packets_auth_received, counter, 'packets.auth.received'}, + {emqx_packets_auth_sent, counter, 'packets.auth.sent'} ]. +%%========== +%% Messages +message_metric_meta() -> + [ + {emqx_messages_received, counter, 'messages.received'}, + {emqx_messages_sent, counter, 'messages.sent'}, + {emqx_messages_qos0_received, counter, 'messages.qos0.received'}, + {emqx_messages_qos0_sent, counter, 'messages.qos0.sent'}, + {emqx_messages_qos1_received, counter, 'messages.qos1.received'}, + {emqx_messages_qos1_sent, counter, 'messages.qos1.sent'}, + {emqx_messages_qos2_received, counter, 'messages.qos2.received'}, + {emqx_messages_qos2_sent, counter, 'messages.qos2.sent'}, + {emqx_messages_publish, counter, 'messages.publish'}, + {emqx_messages_dropped, counter, 'messages.dropped'}, + {emqx_messages_dropped_expired, counter, 'messages.dropped.await_pubrel_timeout'}, + {emqx_messages_dropped_no_subscribers, counter, 'messages.dropped.no_subscribers'}, + {emqx_messages_forward, counter, 'messages.forward'}, + {emqx_messages_retained, counter, 'messages.retained'}, + {emqx_messages_delayed, counter, 'messages.delayed'}, + {emqx_messages_delivered, counter, 'messages.delivered'}, + {emqx_messages_acked, counter, 'messages.acked'} + ]. + +%%========== +%% Delivery +delivery_metric_meta() -> + [ + {emqx_delivery_dropped, counter, 'delivery.dropped'}, + {emqx_delivery_dropped_no_local, counter, 'delivery.dropped.no_local'}, + {emqx_delivery_dropped_too_large, counter, 'delivery.dropped.too_large'}, + {emqx_delivery_dropped_qos0_msg, counter, 'delivery.dropped.qos0_msg'}, + {emqx_delivery_dropped_queue_full, counter, 'delivery.dropped.queue_full'}, + {emqx_delivery_dropped_expired, counter, 'delivery.dropped.expired'} + ]. + +%%========== +%% Client +client_metric_meta() -> + [ + {emqx_client_connect, counter, 'client.connect'}, + {emqx_client_connack, counter, 'client.connack'}, + {emqx_client_connected, counter, 'client.connected'}, + {emqx_client_authenticate, counter, 'client.authenticate'}, + {emqx_client_auth_anonymous, counter, 'client.auth.anonymous'}, + {emqx_client_authorize, counter, 'client.authorize'}, + {emqx_client_subscribe, counter, 'client.subscribe'}, + {emqx_client_unsubscribe, counter, 'client.unsubscribe'}, + {emqx_client_disconnected, counter, 'client.disconnected'} + ]. + +%%========== +%% Metrics - session +session_metric_meta() -> + [ + {emqx_session_created, counter, 'session.created'}, + {emqx_session_resumed, counter, 'session.resumed'}, + {emqx_session_takenover, counter, 'session.takenover'}, + {emqx_session_discarded, counter, 'session.discarded'}, + {emqx_session_terminated, counter, 'session.terminated'} + ]. + +%%========== +%% Metrics - acl +acl_metric_meta() -> + [ + {emqx_authorization_allow, counter, 'authorization.allow'}, + {emqx_authorization_deny, counter, 'authorization.deny'}, + {emqx_authorization_cache_hit, counter, 'authorization.cache_hit'}, + {emqx_authorization_cache_miss, counter, 'authorization.cache_miss'}, + {emqx_authorization_superuser, counter, 'authorization.superuser'}, + {emqx_authorization_nomatch, counter, 'authorization.nomatch'}, + {emqx_authorization_matched_allow, counter, 'authorization.matched_allow'}, + {emqx_authorization_matched_deny, counter, 'authorization.matched_deny'} + ]. + +%%========== +%% Metrics - authn +authn_metric_meta() -> + [ + {emqx_authentication_success, counter, 'authentication.success'}, + {emqx_authentication_success_anonymous, counter, 'authentication.success.anonymous'}, + {emqx_authentication_failure, counter, 'authentication.failure'} + ]. + +%%========== +%% Overload Protection +olp_metric_meta() -> + emqx_metrics_olp_meta(emqx_config_zones:is_olp_enabled()). + +emqx_metrics_olp_meta(true) -> + [ + {emqx_overload_protection_delay_ok, counter, 'overload_protection.delay.ok'}, + {emqx_overload_protection_delay_timeout, counter, 'overload_protection.delay.timeout'}, + {emqx_overload_protection_hibernation, counter, 'overload_protection.hibernation'}, + {emqx_overload_protection_gc, counter, 'overload_protection.gc'}, + {emqx_overload_protection_new_conn, counter, 'overload_protection.new_conn'} + ]; +emqx_metrics_olp_meta(false) -> + []. + +%%======================================== +%% License +%%======================================== + -if(?EMQX_RELEASE_EDITION == ee). -emqx_license() -> + +maybe_license_add_collect_family(Callback, RawData) -> + ok = add_collect_family(Callback, license_metric_meta(), ?MG(license_data, RawData)), + ok. + +maybe_license_fetch_data() -> + #{license_data => license_data()}. + +maybe_license_collect_json_data(RawData) -> + #{license => ?MG(license_data, RawData)}. + +%% license +license_metric_meta() -> [ - emqx_license_expiry_at + {emqx_license_expiry_at, gauge, undefined} ]. -emqx_license_data() -> - [ - {expiry_at, emqx_license_checker:expiry_epoch()} - ]. +license_data() -> + #{emqx_license_expiry_at => emqx_license_checker:expiry_epoch()}. + -else. +maybe_license_add_collect_family(_, _) -> + ok. + +maybe_license_fetch_data() -> + #{}. + +maybe_license_collect_json_data(_RawData) -> + #{}. + -endif. -emqx_certs() -> +%%======================================== +%% Certs +%%======================================== + +cert_metric_meta() -> [ - emqx_cert_expiry_at + {emqx_cert_expiry_at, gauge, undefined} ]. -define(LISTENER_TYPES, [ssl, wss, quic]). --spec emqx_certs_data() -> +-spec cert_data() -> [_Point :: {[Label], Epoch}] when Label :: TypeLabel | NameLabel, TypeLabel :: {listener_type, ssl | wss | quic}, NameLabel :: {listener_name, atom()}, Epoch :: non_neg_integer(). -emqx_certs_data() -> - case emqx_config:get([listeners], undefined) of - undefined -> - []; - AllListeners when is_map(AllListeners) -> - lists:foldl( - fun(ListenerType, PointsAcc) -> - PointsAcc ++ - points_of_listeners(ListenerType, AllListeners) - end, - _PointsInitAcc = [], - ?LISTENER_TYPES - ) - end. +cert_data() -> + cert_data(emqx_config:get([listeners], undefined)). + +cert_data(undefined) -> + []; +cert_data(AllListeners) -> + Points = lists:foldl( + fun(ListenerType, PointsAcc) -> + PointsAcc ++ + points_of_listeners(ListenerType, AllListeners) + end, + _PointsInitAcc = [], + ?LISTENER_TYPES + ), + #{ + emqx_cert_expiry_at => Points + }. points_of_listeners(Type, AllListeners) -> do_points_of_listeners(Type, maps:get(Type, AllListeners, undefined)). @@ -803,24 +834,7 @@ do_points_of_listeners(ListenerType, TypeOfListeners) -> ). gen_point(Type, Name, Path) -> - { - %% Labels: [{_Labelkey, _LabelValue}] - [ - {listener_type, Type}, - {listener_name, Name} - ], - %% Value - cert_expiry_at_from_path(Path) - }. - -collect_certs_json(CertsData) -> - lists:foldl( - fun({Labels, Data}, AccIn) -> - [(maps:from_list(Labels))#{emqx_cert_expiry_at => Data} | AccIn] - end, - _InitAcc = [], - CertsData - ). + {[{listener_type, Type}, {listener_name, Name}], cert_expiry_at_from_path(Path)}. %% TODO: cert manager for more generic utils functions cert_expiry_at_from_path(Path0) -> @@ -849,6 +863,59 @@ utc_time_to_datetime(Str) -> date_to_expiry_epoch(DateTime) -> calendar:datetime_to_gregorian_seconds(DateTime) - ?EPOCH_START. +%%-------------------------------------------------------------------- +%% Collect functions +%%-------------------------------------------------------------------- + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `application/json` + +%% always return json array +collect_cert_json_data(Data) -> + collect_json_data_(Data). + +collect_json_data(Data0) -> + DataListPerNode = collect_json_data_(Data0), + case {?GET_PROM_DATA_MODE(), DataListPerNode} of + %% all nodes results unaggregated, should be a list + {?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED, _} -> + DataListPerNode; + %% only local node result [#{...}] + %% To guaranteed compatibility, return a json object, not array + {?PROM_DATA_MODE__NODE, [NData | _]} -> + NData; + %% All nodes results aggregated + %% return a json object, not array + {?PROM_DATA_MODE__ALL_NODES_AGGREGATED, [NData | _]} -> + NData; + %% olp maybe not enabled, with empty list to empty object + {_, []} -> + #{} + end. + +collect_json_data_(Data) -> + emqx_prometheus_cluster:collect_json_data(Data, fun zip_json_prom_stats_metrics/3). + +zip_json_prom_stats_metrics(Key, Points, [] = _AccIn) -> + lists:foldl( + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + Point = LablesKVMap#{Key => Metric}, + [Point | AccIn2] + end, + [], + Points + ); +zip_json_prom_stats_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl(emqx_prometheus_cluster:point_to_map_fun(Key), [], Points), + lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). + +metrics_name(MetricsAll) -> + [Name || {Name, _, _} <- MetricsAll]. + +%%-------------------------------------------------------------------- +%% bpapi + %% deprecated_since 5.0.10, remove this when 5.1.x do_start() -> emqx_prometheus_sup:start_child(?APP). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 5fa9057da..0d0607518 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -81,43 +81,6 @@ -define(MG0(K, MAP), maps:get(K, MAP, 0)). -define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). --define(AUTHNS_WITH_TYPE, [ - {emqx_authn_enable, gauge}, - {emqx_authn_status, gauge}, - {emqx_authn_nomatch, counter}, - {emqx_authn_total, counter}, - {emqx_authn_success, counter}, - {emqx_authn_failed, counter} -]). - --define(AUTHZS_WITH_TYPE, [ - {emqx_authz_enable, gauge}, - {emqx_authz_status, gauge}, - {emqx_authz_nomatch, counter}, - {emqx_authz_total, counter}, - {emqx_authz_success, counter}, - {emqx_authz_failed, counter} -]). - --define(AUTHN_USERS_COUNT_WITH_TYPE, [ - {emqx_authn_users_count, gauge} -]). - --define(AUTHZ_RULES_COUNT_WITH_TYPE, [ - {emqx_authz_rules_count, gauge} -]). - --define(BANNED_WITH_TYPE, [ - {emqx_banned_count, gauge} -]). - --define(LOGICAL_SUM_METRIC_NAMES, [ - emqx_authn_enable, - emqx_authn_status, - emqx_authz_enable, - emqx_authz_status -]). - %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -132,11 +95,11 @@ deregister_cleanup(_) -> ok. %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), - ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), - ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), - ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), - ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, ?MG(authz_rules_count, RawData)), - ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, ?MG(banned_count, RawData)), + ok = add_collect_family(Callback, authn_metric_meta(), ?MG(authn_data, RawData)), + ok = add_collect_family(Callback, authn_users_count_metric_meta(), ?MG(authn_users_count_data, RawData)), + ok = add_collect_family(Callback, authz_metric_meta(), ?MG(authz_data, RawData)), + ok = add_collect_family(Callback, authz_rules_count_metric_meta(), ?MG(authz_rules_count_data, RawData)), + ok = add_collect_family(Callback, banned_count_metric_meta(), ?MG(banned_count_data, RawData)), ok; collect_mf(_, _) -> ok. @@ -145,8 +108,8 @@ collect_mf(_, _) -> collect(<<"json">>) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), #{ - emqx_authn => collect_json_data(?MG(authn, RawData)), - emqx_authz => collect_json_data(?MG(authz, RawData)), + emqx_authn => collect_json_data(?MG(authn_data, RawData)), + emqx_authz => collect_json_data(?MG(authz_data, RawData)), emqx_banned => collect_banned_data() }; collect(<<"prometheus">>) -> @@ -165,25 +128,30 @@ collect_metrics(Name, Metrics) -> %% behaviour fetch_data_from_local_node() -> {node(self()), #{ - authn => authn_data(), - authz => authz_data() + authn_data => authn_data(), + authz_data => authz_data() }}. fetch_cluster_consistented_data() -> #{ - authn_users_count => authn_users_count_data(), - authz_rules_count => authz_rules_count_data(), - banned_count => banned_count_data() + authn_users_count_data => authn_users_count_data(), + authz_rules_count_data => authz_rules_count_data(), + banned_count_data => banned_count_data() }. aggre_or_zip_init_acc() -> #{ - authn => maps:from_keys(authn_metric_names(), []), - authz => maps:from_keys(authz_metric_names(), []) + authn_data => maps:from_keys(authn_metric(names), []), + authz_data => maps:from_keys(authz_metric(names), []) }. logic_sum_metrics() -> - ?LOGICAL_SUM_METRIC_NAMES. + [ + emqx_authn_enable, + emqx_authn_status, + emqx_authz_enable, + emqx_authz_status + ]. %%-------------------------------------------------------------------- %% Collector @@ -243,6 +211,19 @@ collect_auth(emqx_banned_count, Data) -> %%==================== %% Authn overview +authn_metric_meta() -> + [ + {emqx_authn_enable, gauge}, + {emqx_authn_status, gauge}, + {emqx_authn_nomatch, counter}, + {emqx_authn_total, counter}, + {emqx_authn_success, counter}, + {emqx_authn_failed, counter} + ]. + +authn_metric(names) -> + emqx_prometheus_cluster:metric_names(authn_metric_meta()). + -spec authn_data() -> #{Key => [Point]} when Key :: authn_metric_name(), Point :: {[Label], Metric}, @@ -256,7 +237,7 @@ authn_data() -> AccIn#{Key => authn_backend_to_points(Key, Authns)} end, #{}, - authn_metric_names() + authn_metric(names) ). -spec authn_backend_to_points(Key, list(Authn)) -> list(Point) when @@ -287,15 +268,17 @@ lookup_authn_metrics_local(Id) -> emqx_authn_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authn_metric_names() -- [emqx_authn_enable], 0) + maps:from_keys(authn_metric(names) -- [emqx_authn_enable], 0) end. -authn_metric_names() -> - emqx_prometheus_cluster:metric_names(?AUTHNS_WITH_TYPE). - %%==================== %% Authn users count +authn_users_count_metric_meta() -> + [ + {emqx_authn_users_count, gauge} + ]. + -define(AUTHN_MNESIA, emqx_authn_mnesia). -define(AUTHN_SCRAM_MNESIA, emqx_authn_scram_mnesia). @@ -321,6 +304,19 @@ authn_users_count_data() -> %%==================== %% Authz overview +authz_metric_meta() -> + [ + {emqx_authz_enable, gauge}, + {emqx_authz_status, gauge}, + {emqx_authz_nomatch, counter}, + {emqx_authz_total, counter}, + {emqx_authz_success, counter}, + {emqx_authz_failed, counter} + ]. + +authz_metric(names) -> + emqx_prometheus_cluster:metric_names(authz_metric_meta()). + -spec authz_data() -> #{Key => [Point]} when Key :: authz_metric_name(), Point :: {[Label], Metric}, @@ -334,7 +330,7 @@ authz_data() -> AccIn#{Key => authz_backend_to_points(Key, Authzs)} end, #{}, - authz_metric_names() + authz_metric(names) ). -spec authz_backend_to_points(Key, list(Authz)) -> list(Point) when @@ -365,15 +361,17 @@ lookup_authz_metrics_local(Type) -> emqx_authz_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authz_metric_names() -- [emqx_authz_enable], 0) + maps:from_keys(authz_metric(names) -- [emqx_authz_enable], 0) end. -authz_metric_names() -> - emqx_prometheus_cluster:metric_names(?AUTHZS_WITH_TYPE). - %%==================== %% Authz rules count +authz_rules_count_metric_meta() -> + [ + {emqx_authz_rules_count, gauge} + ]. + -define(ACL_TABLE, emqx_acl). authz_rules_count_data() -> @@ -400,7 +398,13 @@ authz_rules_count_data() -> %%==================== %% Banned count --define(BANNED_TABLE, emqx_banned). +banned_count_metric_meta() -> + [ + {emqx_banned_count, gauge} + ]. +-define(BANNED_TABLE, + emqx_banned +). banned_count_data() -> mnesia_size(?BANNED_TABLE). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index bfd011eaa..008a029a8 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -65,65 +65,6 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). --define(RULES_WITH_TYPE, [ - {emqx_rules_count, gauge} -]). - --define(CONNECTORS_WITH_TYPE, [ - {emqx_connectors_count, gauge} -]). - --define(RULES_SPECIFIC_WITH_TYPE, [ - {emqx_rule_enable, gauge}, - {emqx_rule_matched, counter}, - {emqx_rule_failed, counter}, - {emqx_rule_passed, counter}, - {emqx_rule_failed_exception, counter}, - {emqx_rule_failed_no_result, counter}, - {emqx_rule_actions_total, counter}, - {emqx_rule_actions_success, counter}, - {emqx_rule_actions_failed, counter}, - {emqx_rule_actions_failed_out_of_service, counter}, - {emqx_rule_actions_failed_unknown, counter} -]). - --define(ACTION_SPECIFIC_WITH_TYPE, [ - {emqx_action_matched, counter}, - {emqx_action_dropped, counter}, - {emqx_action_success, counter}, - {emqx_action_failed, counter}, - {emqx_action_inflight, gauge}, - {emqx_action_received, counter}, - {emqx_action_late_reply, counter}, - {emqx_action_retried, counter}, - {emqx_action_retried_success, counter}, - {emqx_action_retried_failed, counter}, - {emqx_action_dropped_resource_stopped, counter}, - {emqx_action_dropped_resource_not_found, counter}, - {emqx_action_dropped_queue_full, counter}, - {emqx_action_dropped_other, counter}, - {emqx_action_dropped_expired, counter}, - {emqx_action_queuing, gauge} -]). - --define(CONNECTOR_SPECIFIC_WITH_TYPE, [ - {emqx_connector_enable, gauge}, - {emqx_connector_status, gauge} -]). - --if(?EMQX_RELEASE_EDITION == ee). --define(SCHEMA_REGISTRY_WITH_TYPE, [ - emqx_schema_registrys_count -]). --else. --endif. - --define(LOGICAL_SUM_METRIC_NAMES, [ - emqx_rule_enable, - emqx_connector_enable, - emqx_connector_status -]). - %%-------------------------------------------------------------------- %% Callback for emqx_prometheus_cluster %%-------------------------------------------------------------------- @@ -132,28 +73,32 @@ fetch_data_from_local_node() -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), {node(self()), #{ - rule_specific_data => rule_specific_data(Rules), - action_specific_data => action_specific_data(Bridges), - connector_specific_data => connector_specific_data(Bridges) + rule_metric_data => rule_metric_data(Rules), + action_metric_data => action_metric_data(Bridges), + connector_metric_data => connector_metric_data(Bridges) }}. fetch_cluster_consistented_data() -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), (maybe_collect_schema_registry())#{ - rules_data => rules_data(Rules), - connectors_data => connectors_data(Bridges) + rules_ov_data => rules_ov_data(Rules), + connectors_ov_data => connectors_ov_data(Bridges) }. aggre_or_zip_init_acc() -> #{ - rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), - action_specific_data => maps:from_keys(action_specific_metric_names(), []), - connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) + rule_metric_data => maps:from_keys(rule_metric(names), []), + action_metric_data => maps:from_keys(action_metric(names), []), + connector_metric_data => maps:from_keys(connectr_metric(names), []) }. logic_sum_metrics() -> - ?LOGICAL_SUM_METRIC_NAMES. + [ + emqx_rule_enable, + emqx_connector_enable, + emqx_connector_status + ]. %%-------------------------------------------------------------------- %% Collector API @@ -170,21 +115,23 @@ collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), %% Data Integration Overview - ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), - ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, ?MG(connectors_data, RawData)), + ok = add_collect_family(Callback, rules_ov_metric_meta(), ?MG(rules_ov_data, RawData)), + ok = add_collect_family( + Callback, connectors_ov_metric_meta(), ?MG(connectors_ov_data, RawData) + ), ok = maybe_collect_family_schema_registry(Callback), - %% Rule Specific - RuleSpecificDs = ?MG(rule_specific_data, RawData), - ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, RuleSpecificDs), + %% Rule Metric + RuleMetricDs = ?MG(rule_metric_data, RawData), + ok = add_collect_family(Callback, rule_metric_meta(), RuleMetricDs), - %% Action Specific - ActionSpecificDs = ?MG(action_specific_data, RawData), - ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, ActionSpecificDs), + %% Action Metric + ActionMetricDs = ?MG(action_metric_data, RawData), + ok = add_collect_family(Callback, action_metric_meta(), ActionMetricDs), - %% Connector Specific - ConnectorSpecificDs = ?MG(connector_specific_data, RawData), - ok = add_collect_family(Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, ConnectorSpecificDs), + %% Connector Metric + ConnectorMetricDs = ?MG(connector_metric_data, RawData), + ok = add_collect_family(Callback, connector_metric_meta(), ConnectorMetricDs), ok; collect_mf(_, _) -> @@ -197,9 +144,9 @@ collect(<<"json">>) -> Bridges = emqx_bridge:list(), #{ data_integration_overview => collect_data_integration_overview(Rules, Bridges), - rules => collect_json_data(?MG(rule_specific_data, RawData)), - actions => collect_json_data(?MG(action_specific_data, RawData)), - connectors => collect_json_data(?MG(connector_specific_data, RawData)) + rules => collect_json_data(?MG(rule_metric_data, RawData)), + actions => collect_json_data(?MG(action_metric_data, RawData)), + connectors => collect_json_data(?MG(connector_metric_data, RawData)) }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). @@ -218,21 +165,6 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). --if(?EMQX_RELEASE_EDITION == ee). -maybe_collect_family_schema_registry(Callback) -> - ok = add_collect_family(Callback, ?SCHEMA_REGISTRY_WITH_TYPE, schema_registry_data()), - ok. - -maybe_collect_schema_registry() -> - schema_registry_data(). --else. -maybe_collect_family_schema_registry(_) -> - ok. - -maybe_collect_schema_registry() -> - #{}. --endif. - %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -244,88 +176,54 @@ maybe_collect_schema_registry() -> %%==================== %% All Rules %% Rules -collect_di(K = emqx_rules_count, Data) -> - gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Schema Registry -collect_di(K = emqx_schema_registrys_count, Data) -> - gauge_metric(?MG(K, Data)); +collect_di(K = emqx_schema_registrys_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Connectors -collect_di(K = emqx_connectors_count, Data) -> - gauge_metric(?MG(K, Data)); +collect_di(K = emqx_connectors_count, Data) -> gauge_metric(?MG(K, Data)); %%======================================== -%% Data Integration for Specific: Rule && Action && Connector +%% Data Integration Metric for: Rule && Action && Connector %%======================================== %%==================== -%% Specific Rule -collect_di(K = emqx_rule_enable, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_passed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_failed_exception, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_failed_no_result, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_total, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_success, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_failed_unknown, Data) -> - counter_metrics(?MG(K, Data)); +%% Rule Metric +collect_di(K = emqx_rule_enable, Data) -> gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_passed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_exception, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_no_result, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_total, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_success, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_unknown, Data) -> counter_metrics(?MG(K, Data)); %%==================== -%% Specific Action - -collect_di(K = emqx_action_matched, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_success, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_inflight, Data) -> - %% inflight type: gauge - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_received, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_late_reply, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_retried, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_retried_success, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_retried_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_resource_stopped, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_resource_not_found, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_queue_full, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_other, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_expired, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_queuing, Data) -> - %% queuing type: gauge - gauge_metrics(?MG(K, Data)); +%% Action Metric +collect_di(K = emqx_action_matched, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_success, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_failed, Data) -> counter_metrics(?MG(K, Data)); +%% inflight type: gauge +collect_di(K = emqx_action_inflight, Data) -> gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_received, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_late_reply, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_success, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_failed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_stopped, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_not_found, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_queue_full, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_other, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_expired, Data) -> counter_metrics(?MG(K, Data)); +%% queuing type: gauge +collect_di(K = emqx_action_queuing, Data) -> gauge_metrics(?MG(K, Data)); %%==================== -%% Specific Connector - -collect_di(K = emqx_connector_enable, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_connector_status, Data) -> - gauge_metrics(?MG(K, Data)). +%% Connector Metric +collect_di(K = emqx_connector_enable, Data) -> gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_connector_status, Data) -> gauge_metrics(?MG(K, Data)). %%-------------------------------------------------------------------- %% Internal functions @@ -338,8 +236,16 @@ collect_di(K = emqx_connector_status, Data) -> %%==================== %% All Rules +rules_ov_metric_meta() -> + [ + {emqx_rules_count, gauge} + ]. + +rules_ov_metric(names) -> + emqx_prometheus_cluster:metric_names(rules_ov_metric_meta()). + -define(RULE_TAB, emqx_rule_engine). -rules_data(_Rules) -> +rules_ov_data(_Rules) -> #{ emqx_rules_count => ets:info(?RULE_TAB, size) }. @@ -348,36 +254,83 @@ rules_data(_Rules) -> %% Schema Registry -if(?EMQX_RELEASE_EDITION == ee). + +maybe_collect_family_schema_registry(Callback) -> + ok = add_collect_family(Callback, schema_registry_metric_meta(), schema_registry_data()), + ok. + +schema_registry_metric_meta() -> + [ + {emqx_schema_registrys_count, gauge} + ]. + schema_registry_data() -> #{ emqx_schema_registrys_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. + +maybe_collect_schema_registry() -> + schema_registry_data(). + -else. + +maybe_collect_family_schema_registry(_) -> + ok. + +maybe_collect_schema_registry() -> + #{}. + -endif. %%==================== %% Connectors -connectors_data(Brdiges) -> +connectors_ov_metric_meta() -> + [ + {emqx_connectors_count, gauge} + ]. + +connectors_ov_metric(names) -> + emqx_prometheus_cluster:metric_names(connectors_ov_metric_meta()). + +connectors_ov_data(Brdiges) -> #{ %% Both Bridge V1 and V2 emqx_connectors_count => erlang:length(Brdiges) }. %%======================================== -%% Data Integration for Specific: Rule && Action && Connector +%% Data Integration Metric for: Rule && Action && Connector %%======================================== %%==================== -%% Specific Rule +%% Rule Metric %% With rule_id as label key: `rule_id` -rule_specific_data(Rules) -> +rule_metric_meta() -> + [ + {emqx_rule_enable, gauge}, + {emqx_rule_matched, counter}, + {emqx_rule_failed, counter}, + {emqx_rule_passed, counter}, + {emqx_rule_failed_exception, counter}, + {emqx_rule_failed_no_result, counter}, + {emqx_rule_actions_total, counter}, + {emqx_rule_actions_success, counter}, + {emqx_rule_actions_failed, counter}, + {emqx_rule_actions_failed_out_of_service, counter}, + {emqx_rule_actions_failed_unknown, counter} + ]. + +rule_metric(names) -> + emqx_prometheus_cluster:metric_names(rule_metric_meta()). + +rule_metric_data(Rules) -> lists:foldl( fun(#{id := Id} = Rule, AccIn) -> merge_acc_with_rules(Id, get_metric(Rule), AccIn) end, - maps:from_keys(rule_specific_metric_names(), []), + maps:from_keys(rule_metric(names), []), Rules ). @@ -413,20 +366,40 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> } end. -rule_specific_metric_names() -> - emqx_prometheus_cluster:metric_names(?RULES_SPECIFIC_WITH_TYPE). - %%==================== -%% Specific Action +%% Action Metric %% With action_id: `{type}:{name}` as label key: `action_id` -action_specific_data(Bridges) -> +action_metric_meta() -> + [ + {emqx_action_matched, counter}, + {emqx_action_dropped, counter}, + {emqx_action_success, counter}, + {emqx_action_failed, counter}, + {emqx_action_inflight, gauge}, + {emqx_action_received, counter}, + {emqx_action_late_reply, counter}, + {emqx_action_retried, counter}, + {emqx_action_retried_success, counter}, + {emqx_action_retried_failed, counter}, + {emqx_action_dropped_resource_stopped, counter}, + {emqx_action_dropped_resource_not_found, counter}, + {emqx_action_dropped_queue_full, counter}, + {emqx_action_dropped_other, counter}, + {emqx_action_dropped_expired, counter}, + {emqx_action_queuing, gauge} + ]. + +action_metric(names) -> + emqx_prometheus_cluster:metric_names(action_metric_meta()). + +action_metric_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = _Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, - maps:from_keys(action_specific_metric_names(), []), + maps:from_keys(action_metric(names), []), Bridges ). @@ -467,20 +440,26 @@ get_bridge_metric(Type, Name) -> } end. -action_specific_metric_names() -> - emqx_prometheus_cluster:metric_names(?ACTION_SPECIFIC_WITH_TYPE). - %%==================== -%% Specific Connector +%% Connector Metric %% With connector_id: `{type}:{name}` as label key: `connector_id` -connector_specific_data(Bridges) -> +connector_metric_meta() -> + [ + {emqx_connector_enable, gauge}, + {emqx_connector_status, gauge} + ]. + +connectr_metric(names) -> + emqx_prometheus_cluster:metric_names(connector_metric_meta()). + +connector_metric_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) end, - maps:from_keys(connectr_specific_metric_names(), []), + maps:from_keys(connectr_metric(names), []), Bridges ). @@ -504,9 +483,6 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> emqx_connector_status => emqx_prometheus_cluster:status_to_number(Status) }. -connectr_specific_metric_names() -> - emqx_prometheus_cluster:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). - %%-------------------------------------------------------------------- %% Collect functions %%-------------------------------------------------------------------- @@ -514,18 +490,18 @@ connectr_specific_metric_names() -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `application/json` collect_data_integration_overview(Rules, Bridges) -> - RulesD = rules_data(Rules), - ConnectorsD = connectors_data(Bridges), + RulesD = rules_ov_data(Rules), + ConnectorsD = connectors_ov_data(Bridges), M1 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, - emqx_prometheus_cluster:metric_names(?RULES_WITH_TYPE) + rules_ov_metric(names) ), M2 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, - emqx_prometheus_cluster:metric_names(?CONNECTORS_WITH_TYPE) + connectors_ov_metric(names) ), M3 = maybe_collect_schema_registry(), From 2263df0242ac617ecdbdb5a2e05b56f145e54995 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 21 Jan 2024 23:11:30 +0800 Subject: [PATCH 43/89] fix(prom_push_gw): use format mode `node` for prometheus push gateway --- apps/emqx_prometheus/src/emqx_prometheus_cluster.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl index e48df0f8b..2a68c7b3b 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl @@ -41,6 +41,9 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). +raw_data(Module, undefined) -> + %% TODO: for push gateway, the format mode should be configurable + raw_data(Module, ?PROM_DATA_MODE__NODE); raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> AllNodesMetrics = aggre_cluster(Module), Cluster = Module:fetch_cluster_consistented_data(), From 2061d75b50471eded6ee932386836440220692de Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 21 Jan 2024 22:46:27 +0800 Subject: [PATCH 44/89] docs: prometheus api `mode` field description --- .../emqx_prometheus/src/emqx_prometheus_api.erl | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 47a5b0299..89bfa6e6a 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -126,6 +126,7 @@ security() -> false -> [] end. +%% erlfmt-ignore fields(mode) -> [ {mode, @@ -133,7 +134,21 @@ fields(mode) -> hoconsc:enum(?PROM_DATA_MODES), #{ default => node, - desc => <<"Metrics format mode.">>, + desc => <<" +Metrics format mode. + +`node`: +Return metrics from local node. And it is the default behaviour if `mode` not specified. + +`all_nodes_aggregated`: +Return metrics for all nodes. +And if possible, calculate the arithmetic sum or logical sum of the indicators of all nodes. + +`all_nodes_unaggregated`: +Return metrics from all nodes, and the metrics are not aggregated. +The node name will be included in the returned results to +indicate that certain metrics were returned on a certain node. +">>, in => query, required => false, example => node From c2f26e8e982c06b56f256ae0dce5fca9d1eb0113 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 17:18:42 +0800 Subject: [PATCH 45/89] docs: bump change log --- changes/ce/feat-12299.en.md | 15 +++++++++++++++ changes/ee/feat-12299.en.md | 17 +++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 changes/ce/feat-12299.en.md create mode 100644 changes/ee/feat-12299.en.md diff --git a/changes/ce/feat-12299.en.md b/changes/ce/feat-12299.en.md new file mode 100644 index 000000000..1721970e4 --- /dev/null +++ b/changes/ce/feat-12299.en.md @@ -0,0 +1,15 @@ +Expose more metrics to improve observability: + +Montior API: + - Add `retained_msg_count` field to `/api/v5/monitor_current`. + - Add `retained_msg_count` and `node_uptime` fields to `/api/v5/monitor_current/nodes/{node}`. + +Prometheus API: + - Add `emqx_cert_expiry_at` to `/api/v5/prometheus/stats` to display TLS listener certificate expiration time. + - Add `/api/v5/prometheus/auth` endpoint to provide metrics such as execution count and running status for all authenticatiors and authorizators. + - Add `/api/v5/prometheus/data_integration` endpoint to provide metrics such as execution count and status for all rules, actions, and connectors. + +Limitations: + Prometheus push gateway only supports content in `/api/v5/prometheus/stats?mode=node` for now. + +For more API details and metric type information. Please see also in swagger api docs. diff --git a/changes/ee/feat-12299.en.md b/changes/ee/feat-12299.en.md new file mode 100644 index 000000000..629928b90 --- /dev/null +++ b/changes/ee/feat-12299.en.md @@ -0,0 +1,17 @@ +# Expose more metrics to improve observability: + +Montior API: + - Add `retained_msg_count` field to `/api/v5/monitor_current`. + - Add `license_quota` field to `/api/v5/monitor_current` + - Add `retained_msg_count` and `node_uptime` fields to `/api/v5/monitor_current/nodes/{node}`. + - Add `retained_msg_count`, `license_quota` and `node_uptime` fields to `/api/v5/monitor_current/nodes/{node}`. + +Prometheus API: + - Add `emqx_cert_expiry_at` and `emqx_license_expiry_at` to `/api/v5/prometheus/stats` to display TLS listener certificate expiration time and license expiration time. + - Add `/api/v5/prometheus/auth` endpoint to provide metrics such as execution count and running status for all authenticatiors and authorizators. + - Add `/api/v5/prometheus/data_integration` endpoint to provide metrics such as execution count and status for all rules, actions, and connectors. + +Limitations: + Prometheus push gateway only supports the content in `/api/v5/prometheus/stats?mode=node` + +For more API details and metric type information. Please see also in swagger api docs. From cd90b93550304ac0c0eb45ae91ef28d06962c972 Mon Sep 17 00:00:00 2001 From: firest Date: Mon, 22 Jan 2024 19:13:50 +0800 Subject: [PATCH 46/89] fix(sysk): fix probe testing bugs for syskeeper --- .../src/emqx_bridge_syskeeper_proxy_server.erl | 15 +++++++++++++-- .../src/emqx_connector_resource.erl | 5 ++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper_proxy_server.erl b/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper_proxy_server.erl index 187ca1c64..3fce41ec3 100644 --- a/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper_proxy_server.erl +++ b/apps/emqx_bridge_syskeeper/src/emqx_bridge_syskeeper_proxy_server.erl @@ -67,11 +67,17 @@ on_start( {tcp_options, [{mode, binary}, {reuseaddr, true}, {nodelay, true}]} ], MFArgs = {?MODULE, start_link, [maps:with([handshake_timeout], Config)]}, - ok = emqx_resource:allocate_resource(InstanceId, listen_on, ListenOn), + %% Since the esockd only supports atomic name and we don't want to introduce a new atom per each instance + %% when the port is same for two instance/connector, them will reference to a same esockd listener + %% to prevent the failed one dealloctes the listener which created by a earlier instance + %% we need record only when the listen is successed case esockd:open(?MODULE, ListenOn, Options, MFArgs) of {ok, _} -> + ok = emqx_resource:allocate_resource(InstanceId, listen_on, ListenOn), {ok, #{listen_on => ListenOn}}; + {error, {already_started, _}} -> + {error, eaddrinuse}; Error -> Error end. @@ -83,7 +89,12 @@ on_stop(InstanceId, _State) -> }), case emqx_resource:get_allocated_resources(InstanceId) of #{listen_on := ListenOn} -> - esockd:close(?MODULE, ListenOn); + case esockd:close(?MODULE, ListenOn) of + {error, not_found} -> + ok; + Result -> + Result + end; _ -> ok end. diff --git a/apps/emqx_connector/src/emqx_connector_resource.erl b/apps/emqx_connector/src/emqx_connector_resource.erl index f85109080..8611ba744 100644 --- a/apps/emqx_connector/src/emqx_connector_resource.erl +++ b/apps/emqx_connector/src/emqx_connector_resource.erl @@ -229,7 +229,10 @@ create_dry_run(Type, Conf0, Callback) -> TypeBin = bin(Type), TypeAtom = safe_atom(Type), %% We use a fixed name here to avoid creating an atom - TmpName = iolist_to_binary([?TEST_ID_PREFIX, TypeBin, ":", <<"probedryrun">>]), + %% to avoid potential race condition, the resource id should be unique + UID = integer_to_binary(erlang:unique_integer([monotonic, positive])), + TmpName = + iolist_to_binary([?TEST_ID_PREFIX, TypeBin, ":", <<"probedryrun">>, UID]), TmpPath = emqx_utils:safe_filename(TmpName), Conf1 = maps:without([<<"name">>], Conf0), RawConf = #{<<"connectors">> => #{TypeBin => #{<<"temp_name">> => Conf1}}}, From 3207f0ea808d3e9377bc5f08434911c0933ce504 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Fri, 19 Jan 2024 15:34:14 -0300 Subject: [PATCH 47/89] fix(machine_boot): inject connector/bridge apps as dependencies to `emqx_connector` app Fixes https://emqx.atlassian.net/browse/EMQX-11771 For the same reasons as we inject `emqx_bridge_*` applications as dependencies to `emqx_bridge` when starting the node. Already configured connectors are started when `emqx_connector` application starts, and may lead to crashes and noise in the logs. One example is to configure a mongodb bridge and restart the node. --- apps/emqx_bridge_es/src/emqx_bridge_es.app.src | 3 +-- apps/emqx_bridge_http/src/emqx_bridge_http.app.src | 2 +- apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.app.src | 4 +--- apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.app.src | 1 - apps/emqx_bridge_mysql/src/emqx_bridge_mysql.app.src | 1 - apps/emqx_bridge_redis/src/emqx_bridge_redis.app.src | 1 - apps/emqx_machine/src/emqx_machine_boot.erl | 7 +++++++ apps/emqx_management/test/emqx_mgmt_api_configs_SUITE.erl | 2 +- apps/emqx_mongodb/src/emqx_mongodb.app.src | 3 +-- apps/emqx_mysql/src/emqx_mysql.app.src | 3 +-- apps/emqx_postgresql/src/emqx_postgresql.app.src | 3 +-- apps/emqx_redis/src/emqx_redis.app.src | 3 +-- changes/ce/fix-12359.en.md | 1 + 13 files changed, 16 insertions(+), 18 deletions(-) create mode 100644 changes/ce/fix-12359.en.md diff --git a/apps/emqx_bridge_es/src/emqx_bridge_es.app.src b/apps/emqx_bridge_es/src/emqx_bridge_es.app.src index 9e98cd33e..34b07d305 100644 --- a/apps/emqx_bridge_es/src/emqx_bridge_es.app.src +++ b/apps/emqx_bridge_es/src/emqx_bridge_es.app.src @@ -10,8 +10,7 @@ {applications, [ kernel, stdlib, - emqx_resource, - emqx_connector + emqx_resource ]}, {env, []}, {licenses, ["Business Source License 1.1"]}, diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http.app.src b/apps/emqx_bridge_http/src/emqx_bridge_http.app.src index c1c961e5b..0c355691b 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http.app.src +++ b/apps/emqx_bridge_http/src/emqx_bridge_http.app.src @@ -2,7 +2,7 @@ {description, "EMQX HTTP Bridge and Connector Application"}, {vsn, "0.2.2"}, {registered, []}, - {applications, [kernel, stdlib, emqx_connector, emqx_resource, ehttpc]}, + {applications, [kernel, stdlib, emqx_resource, ehttpc]}, {env, [{emqx_action_info_modules, [emqx_bridge_http_action_info]}]}, {modules, []}, {links, []} diff --git a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.app.src b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.app.src index d7044b063..4fd96d5e7 100644 --- a/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.app.src +++ b/apps/emqx_bridge_iotdb/src/emqx_bridge_iotdb.app.src @@ -10,9 +10,7 @@ {applications, [ kernel, stdlib, - emqx_resource, - %% for module emqx_connector_http - emqx_connector + emqx_resource ]}, {env, []}, {licenses, ["Business Source License 1.1"]}, diff --git a/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.app.src b/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.app.src index 198c5f8e8..aaaae4cf4 100644 --- a/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.app.src +++ b/apps/emqx_bridge_mongodb/src/emqx_bridge_mongodb.app.src @@ -5,7 +5,6 @@ {applications, [ kernel, stdlib, - emqx_connector, emqx_resource, emqx_mongodb ]}, diff --git a/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.app.src b/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.app.src index 5c2651b89..be5f4b417 100644 --- a/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.app.src +++ b/apps/emqx_bridge_mysql/src/emqx_bridge_mysql.app.src @@ -5,7 +5,6 @@ {applications, [ kernel, stdlib, - emqx_connector, emqx_resource, emqx_mysql ]}, diff --git a/apps/emqx_bridge_redis/src/emqx_bridge_redis.app.src b/apps/emqx_bridge_redis/src/emqx_bridge_redis.app.src index a2e006443..8e737c4a3 100644 --- a/apps/emqx_bridge_redis/src/emqx_bridge_redis.app.src +++ b/apps/emqx_bridge_redis/src/emqx_bridge_redis.app.src @@ -5,7 +5,6 @@ {applications, [ kernel, stdlib, - emqx_connector, emqx_resource, emqx_redis ]}, diff --git a/apps/emqx_machine/src/emqx_machine_boot.erl b/apps/emqx_machine/src/emqx_machine_boot.erl index 08cf8c448..a87cc545a 100644 --- a/apps/emqx_machine/src/emqx_machine_boot.erl +++ b/apps/emqx_machine/src/emqx_machine_boot.erl @@ -178,6 +178,11 @@ app_deps(App, RebootApps) -> %% `emqx_bridge' is special in that it needs all the bridges apps to %% be started before it, so that, when it loads the bridges from %% configuration, the bridge app and its dependencies need to be up. +%% +%% `emqx_connector' also needs to start all connector dependencies for the same reason. +%% Since standalone apps like `emqx_mongodb' are already dependencies of `emqx_bridge_*' +%% apps, we may apply the same tactic for `emqx_connector' and inject individual bridges +%% as its dependencies. inject_bridge_deps(RebootAppDeps) -> BridgeApps = [ App @@ -188,6 +193,8 @@ inject_bridge_deps(RebootAppDeps) -> fun ({emqx_bridge, Deps0}) when is_list(Deps0) -> {emqx_bridge, Deps0 ++ BridgeApps}; + ({emqx_connector, Deps0}) when is_list(Deps0) -> + {emqx_connector, Deps0 ++ BridgeApps}; (App) -> App end, diff --git a/apps/emqx_management/test/emqx_mgmt_api_configs_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_configs_SUITE.erl index 6e520ba58..c555d3e16 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_configs_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_configs_SUITE.erl @@ -392,7 +392,7 @@ t_create_webhook_v1_bridges_api({'init', Config}) -> lists:foreach( fun(App) -> _ = application:stop(App), - {ok, [App]} = application:ensure_all_started(App) + {ok, _} = application:ensure_all_started(App) end, [emqx_connector, emqx_bridge] ), diff --git a/apps/emqx_mongodb/src/emqx_mongodb.app.src b/apps/emqx_mongodb/src/emqx_mongodb.app.src index 8279da934..e171d3dd2 100644 --- a/apps/emqx_mongodb/src/emqx_mongodb.app.src +++ b/apps/emqx_mongodb/src/emqx_mongodb.app.src @@ -1,12 +1,11 @@ {application, emqx_mongodb, [ {description, "EMQX MongoDB Connector"}, - {vsn, "0.1.4"}, + {vsn, "0.1.5"}, {registered, []}, {applications, [ kernel, stdlib, mongodb, - emqx_connector, emqx_resource ]}, {env, []}, diff --git a/apps/emqx_mysql/src/emqx_mysql.app.src b/apps/emqx_mysql/src/emqx_mysql.app.src index 9ae3234cc..bc6f6f6fb 100644 --- a/apps/emqx_mysql/src/emqx_mysql.app.src +++ b/apps/emqx_mysql/src/emqx_mysql.app.src @@ -1,12 +1,11 @@ {application, emqx_mysql, [ {description, "EMQX MySQL Database Connector"}, - {vsn, "0.1.6"}, + {vsn, "0.1.7"}, {registered, []}, {applications, [ kernel, stdlib, mysql, - emqx_connector, emqx_resource ]}, {env, []}, diff --git a/apps/emqx_postgresql/src/emqx_postgresql.app.src b/apps/emqx_postgresql/src/emqx_postgresql.app.src index 9c31b49c6..14877af68 100644 --- a/apps/emqx_postgresql/src/emqx_postgresql.app.src +++ b/apps/emqx_postgresql/src/emqx_postgresql.app.src @@ -1,12 +1,11 @@ {application, emqx_postgresql, [ {description, "EMQX PostgreSQL Database Connector"}, - {vsn, "0.1.1"}, + {vsn, "0.1.2"}, {registered, []}, {applications, [ kernel, stdlib, epgsql, - emqx_connector, emqx_resource ]}, {env, []}, diff --git a/apps/emqx_redis/src/emqx_redis.app.src b/apps/emqx_redis/src/emqx_redis.app.src index 1f8c5fbc3..660c490e6 100644 --- a/apps/emqx_redis/src/emqx_redis.app.src +++ b/apps/emqx_redis/src/emqx_redis.app.src @@ -1,13 +1,12 @@ {application, emqx_redis, [ {description, "EMQX Redis Database Connector"}, - {vsn, "0.1.4"}, + {vsn, "0.1.5"}, {registered, []}, {applications, [ kernel, stdlib, eredis, eredis_cluster, - emqx_connector, emqx_resource ]}, {env, []}, diff --git a/changes/ce/fix-12359.en.md b/changes/ce/fix-12359.en.md new file mode 100644 index 000000000..abc0771a0 --- /dev/null +++ b/changes/ce/fix-12359.en.md @@ -0,0 +1 @@ +Fixed an issue that could lead to error logs when restarting a node configured with some types of data bridges. Said bridges could also start in a failed state, requiring manual restart. From ada2785b5d31f033958b26a461803cd39997830f Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Tue, 23 Jan 2024 12:18:11 +0800 Subject: [PATCH 48/89] chore: es's base_url to server --- .../src/emqx_bridge_es_connector.erl | 70 ++++++++++++------- .../test/emqx_bridge_es_SUITE.erl | 7 +- .../src/emqx_bridge_influxdb_connector.erl | 3 +- rel/i18n/emqx_bridge_es.hocon | 5 -- rel/i18n/emqx_bridge_es_connector.hocon | 8 +++ 5 files changed, 59 insertions(+), 34 deletions(-) diff --git a/apps/emqx_bridge_es/src/emqx_bridge_es_connector.erl b/apps/emqx_bridge_es/src/emqx_bridge_es_connector.erl index 8b68af10f..256645c31 100644 --- a/apps/emqx_bridge_es/src/emqx_bridge_es_connector.erl +++ b/apps/emqx_bridge_es/src/emqx_bridge_es_connector.erl @@ -34,6 +34,7 @@ ]). -export([render_template/2]). +-export([convert_server/2]). %% emqx_connector_resource behaviour callbacks -export([connector_config/2]). @@ -92,7 +93,7 @@ connector_example_values() -> <<"username">> => <<"root">>, <<"password">> => <<"******">> }, - base_url => <<"http://127.0.0.1:9200/">>, + server => <<"127.0.0.1:9200">>, connect_timeout => <<"15s">>, pool_type => <<"random">>, pool_size => 8, @@ -116,14 +117,7 @@ fields(config) -> fields("connection_fields"); fields("connection_fields") -> [ - {base_url, - ?HOCON( - emqx_schema:url(), - #{ - required => true, - desc => ?DESC(emqx_bridge_es, "config_base_url") - } - )}, + {server, server()}, {authentication, ?HOCON( ?UNION([?R_REF(auth_basic)]), @@ -158,30 +152,36 @@ desc(auth_basic) -> "Basic Authentication"; desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> ["Configuration for Elastic Search using `", string:to_upper(Method), "` method."]; +desc("server") -> + ?DESC("server"); desc(_) -> undefined. +server() -> + Meta = #{ + required => true, + default => <<"127.0.0.1:9200">>, + desc => ?DESC("server"), + converter => fun ?MODULE:convert_server/2 + }, + emqx_schema:servers_sc(Meta, #{default_port => 9200}). + +convert_server(<<"http://", Server/binary>>, HoconOpts) -> + convert_server(Server, HoconOpts); +convert_server(<<"https://", Server/binary>>, HoconOpts) -> + convert_server(Server, HoconOpts); +convert_server(Server0, HoconOpts) -> + Server = string:trim(Server0, trailing, "/"), + emqx_schema:convert_servers(Server, HoconOpts). + connector_config(Conf, #{name := Name, parse_confs := ParseConfs}) -> - #{ - base_url := BaseUrl, - authentication := - #{ - username := Username, - password := Password0 - } - } = Conf, - - Password = emqx_secret:unwrap(Password0), - Base64 = base64:encode(<>), - BasicToken = <<"Basic ", Base64/binary>>, - WebhookConfig = Conf#{ method => <<"post">>, - url => BaseUrl, + url => base_url(Conf), headers => [ {<<"Content-type">>, <<"application/json">>}, - {<<"Authorization">>, BasicToken} + {<<"Authorization">>, basic_token(Conf)} ] }, ParseConfs( @@ -190,6 +190,19 @@ connector_config(Conf, #{name := Name, parse_confs := ParseConfs}) -> WebhookConfig ). +basic_token(#{ + authentication := + #{ + username := Username, + password := Password0 + } +}) -> + Password = emqx_secret:unwrap(Password0), + Base64 = base64:encode(<>), + <<"Basic ", Base64/binary>>. + +base_url(#{ssl := #{enable := true}, server := Server}) -> "https://" ++ Server; +base_url(#{server := Server}) -> "http://" ++ Server. %%------------------------------------------------------------------------------------- %% `emqx_resource' API %%------------------------------------------------------------------------------------- @@ -316,6 +329,10 @@ on_get_channel_status(_InstanceId, ChannelId, #{channels := Channels}) -> {error, not_exists} end. +render_template([<<"update_without_doc_template">>], Msg) -> + emqx_utils_json:encode(#{<<"doc">> => Msg}); +render_template([<<"create_without_doc_template">>], Msg) -> + emqx_utils_json:encode(#{<<"doc">> => Msg, <<"doc_as_upsert">> => true}); render_template(Template, Msg) -> % Ignoring errors here, undefined bindings will be replaced with empty string. Opts = #{var_trans => fun to_string/2}, @@ -395,6 +412,11 @@ get_body_template(#{action := update, doc := Doc} = Template) -> false -> <<"{\"doc\":", Doc/binary, "}">>; true -> <<"{\"doc\":", Doc/binary, ",\"doc_as_upsert\": true}">> end; +get_body_template(#{action := update} = Template) -> + case maps:get(doc_as_upsert, Template, false) of + false -> <<"update_without_doc_template">>; + true -> <<"create_without_doc_template">> + end; get_body_template(#{doc := Doc}) -> Doc; get_body_template(_) -> diff --git a/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl b/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl index a9ff70957..ee7e8524c 100644 --- a/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl +++ b/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl @@ -169,11 +169,10 @@ action(ConnectorName) -> } }. -base_url(Config) -> +server(Config) -> Host = ?config(es_host, Config), Port = ?config(es_port, Config), iolist_to_binary([ - "https://", Host, ":", integer_to_binary(Port) @@ -185,7 +184,7 @@ connector_config(Config) -> connector_config(Overrides, Config) -> Defaults = #{ - <<"base_url">> => base_url(Config), + <<"server">> => server(Config), <<"enable">> => true, <<"authentication">> => #{ <<"password">> => <<"emqx123">>, @@ -314,7 +313,7 @@ t_bad_url(Config) -> ActionName = <<"test_action">>, ActionConfig = action(<<"test_connector">>), ConnectorConfig0 = connector_config(Config), - ConnectorConfig = ConnectorConfig0#{<<"base_url">> := <<"bad_host:9092">>}, + ConnectorConfig = ConnectorConfig0#{<<"server">> := <<"bad_host:9092">>}, ?assertMatch({ok, _}, create_connector(ConnectorName, ConnectorConfig)), ?assertMatch({ok, _}, create_action(ActionName, ActionConfig)), ?assertMatch( diff --git a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl index 478486e5b..809f47e49 100644 --- a/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl +++ b/apps/emqx_bridge_influxdb/src/emqx_bridge_influxdb_connector.erl @@ -865,7 +865,8 @@ convert_server(<<"http://", Server/binary>>, HoconOpts) -> convert_server(Server, HoconOpts); convert_server(<<"https://", Server/binary>>, HoconOpts) -> convert_server(Server, HoconOpts); -convert_server(Server, HoconOpts) -> +convert_server(Server0, HoconOpts) -> + Server = string:trim(Server0, trailing, "/"), emqx_schema:convert_servers(Server, HoconOpts). str(A) when is_atom(A) -> diff --git a/rel/i18n/emqx_bridge_es.hocon b/rel/i18n/emqx_bridge_es.hocon index 8ad11f05b..1cff9dbb9 100644 --- a/rel/i18n/emqx_bridge_es.hocon +++ b/rel/i18n/emqx_bridge_es.hocon @@ -35,11 +35,6 @@ config_auth_basic_password.desc: config_auth_basic_password.label: """HTTP Basic Auth Password""" -config_base_url.desc: -"""The base URL of the external ElasticSearch service's REST interface.""" -config_base_url.label: -"""ElasticSearch REST Service Base URL""" - config_target.desc: """Name of the data stream, index, or index alias to perform bulk actions on""" diff --git a/rel/i18n/emqx_bridge_es_connector.hocon b/rel/i18n/emqx_bridge_es_connector.hocon index ddd53e0fc..21406b840 100644 --- a/rel/i18n/emqx_bridge_es_connector.hocon +++ b/rel/i18n/emqx_bridge_es_connector.hocon @@ -1,5 +1,13 @@ emqx_bridge_es_connector { +server.desc: +"""The IPv4 or IPv6 address or the hostname to connect to. +A host entry has the following form: `Host[:Port]`. +The Elasticsearch default port 9200 is used if `[:Port]` is not specified.""" + +server.label: +"""Server Host""" + config_authentication.desc: """Authentication configuration""" From 51e8c955ffffea95959aafff91ae792209e70f8a Mon Sep 17 00:00:00 2001 From: firest Date: Tue, 23 Jan 2024 13:03:12 +0800 Subject: [PATCH 49/89] fix(connector): make the connector id unique when doing probe testing --- apps/emqx_connector/src/emqx_connector_resource.erl | 6 +++--- apps/emqx_resource/src/emqx_resource_manager.erl | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/apps/emqx_connector/src/emqx_connector_resource.erl b/apps/emqx_connector/src/emqx_connector_resource.erl index 8611ba744..74c167b32 100644 --- a/apps/emqx_connector/src/emqx_connector_resource.erl +++ b/apps/emqx_connector/src/emqx_connector_resource.erl @@ -107,7 +107,7 @@ parse_connector_id(ConnectorId) -> {atom(), atom() | binary()}. parse_connector_id(<<"connector:", ConnectorId/binary>>, Opts) -> parse_connector_id(ConnectorId, Opts); -parse_connector_id(<>, Opts) -> +parse_connector_id(<>, Opts) -> parse_connector_id(ConnectorId, Opts); parse_connector_id(ConnectorId, Opts) -> emqx_resource:parse_resource_id(ConnectorId, Opts). @@ -230,9 +230,9 @@ create_dry_run(Type, Conf0, Callback) -> TypeAtom = safe_atom(Type), %% We use a fixed name here to avoid creating an atom %% to avoid potential race condition, the resource id should be unique - UID = integer_to_binary(erlang:unique_integer([monotonic, positive])), + Prefix = emqx_resource_manager:make_test_id(), TmpName = - iolist_to_binary([?TEST_ID_PREFIX, TypeBin, ":", <<"probedryrun">>, UID]), + iolist_to_binary([Prefix, TypeBin, ":", <<"probedryrun">>]), TmpPath = emqx_utils:safe_filename(TmpName), Conf1 = maps:without([<<"name">>], Conf0), RawConf = #{<<"connectors">> => #{TypeBin => #{<<"temp_name">> => Conf1}}}, diff --git a/apps/emqx_resource/src/emqx_resource_manager.erl b/apps/emqx_resource/src/emqx_resource_manager.erl index 4fd566f26..b16520d3d 100644 --- a/apps/emqx_resource/src/emqx_resource_manager.erl +++ b/apps/emqx_resource/src/emqx_resource_manager.erl @@ -50,7 +50,8 @@ ]). -export([ - set_resource_status_connecting/1 + set_resource_status_connecting/1, + make_test_id/0 ]). % Server From 6a9cbb3d97f839c00e3ef49efd049b6ba2e2a0bf Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Tue, 23 Jan 2024 15:51:09 +0800 Subject: [PATCH 50/89] test: add es's update test --- .../test/emqx_bridge_es_SUITE.erl | 89 ++++++++++++++++--- 1 file changed, 76 insertions(+), 13 deletions(-) diff --git a/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl b/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl index ee7e8524c..530eb77b2 100644 --- a/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl +++ b/apps/emqx_bridge_es/test/emqx_bridge_es_SUITE.erl @@ -103,13 +103,13 @@ end_per_testcase(_TestCase, _Config) -> %% Helper fns %%------------------------------------------------------------------------------------- -check_send_message_with_action(Topic, ActionName, ConnectorName) -> +check_send_message_with_action(Topic, ActionName, ConnectorName, Expect) -> send_message(Topic), %% ###################################### %% Check if message is sent to es %% ###################################### timer:sleep(500), - check_action_metrics(ActionName, ConnectorName). + check_action_metrics(ActionName, ConnectorName, Expect). send_message(Topic) -> Now = emqx_utils_calendar:now_to_rfc3339(microsecond), @@ -123,7 +123,7 @@ send_message(Topic) -> ok = emqtt:publish(Client, Topic, Payload, [{qos, 0}]), ok. -check_action_metrics(ActionName, ConnectorName) -> +check_action_metrics(ActionName, ConnectorName, Expect) -> ActionId = emqx_bridge_v2:id(?TYPE, ActionName, ConnectorName), Metrics = #{ @@ -134,13 +134,7 @@ check_action_metrics(ActionName, ConnectorName) -> dropped => emqx_resource_metrics:dropped_get(ActionId) }, ?assertEqual( - #{ - match => 1, - success => 1, - dropped => 0, - failed => 0, - queuing => 0 - }, + Expect, Metrics, {ActionName, ConnectorName, ActionId} ). @@ -248,7 +242,7 @@ t_create_remove_list(Config) -> ok. %% Test sending a message to a bridge V2 -t_send_message(Config) -> +t_create_message(Config) -> ConnectorConfig = connector_config(Config), {ok, _} = emqx_connector:create(?TYPE, test_connector2, ConnectorConfig), ActionConfig = action(<<"test_connector2">>), @@ -261,7 +255,8 @@ t_send_message(Config) -> }, {ok, _} = emqx_rule_engine:create_rule(Rule), %% Use the action to send a message - check_send_message_with_action(<<"es/1">>, test_action_1, test_connector2), + Expect = #{match => 1, success => 1, dropped => 0, failed => 0, queuing => 0}, + check_send_message_with_action(<<"es/1">>, test_action_1, test_connector2, Expect), %% Create a few more bridges with the same connector and test them ActionNames1 = lists:foldl( @@ -278,7 +273,7 @@ t_send_message(Config) -> }, {ok, _} = emqx_rule_engine:create_rule(Rule1), Topic = <<"es/", Seq/binary>>, - check_send_message_with_action(Topic, ActionName, test_connector2), + check_send_message_with_action(Topic, ActionName, test_connector2, Expect), [ActionName | Acc] end, [], @@ -293,6 +288,74 @@ t_send_message(Config) -> ActionNames ), emqx_connector:remove(?TYPE, test_connector2), + lists:foreach( + fun(#{id := Id}) -> + emqx_rule_engine:delete_rule(Id) + end, + emqx_rule_engine:get_rules() + ), + ok. + +t_update_message(Config) -> + ConnectorConfig = connector_config(Config), + {ok, _} = emqx_connector:create(?TYPE, update_connector, ConnectorConfig), + ActionConfig0 = action(<<"update_connector">>), + DocId = emqx_guid:to_hexstr(emqx_guid:gen()), + ActionConfig1 = ActionConfig0#{ + <<"parameters">> => #{ + <<"index">> => <<"${payload.index}">>, + <<"id">> => DocId, + <<"max_retries">> => 0, + <<"action">> => <<"update">>, + <<"doc">> => <<"${payload.doc}">> + } + }, + {ok, _} = emqx_bridge_v2:create(?TYPE, update_action, ActionConfig1), + Rule = #{ + id => <<"rule:t_es_1">>, + sql => <<"SELECT\n *\nFROM\n \"es/#\"">>, + actions => [<<"elasticsearch:update_action">>], + description => <<"sink doc to elasticsearch">> + }, + {ok, _} = emqx_rule_engine:create_rule(Rule), + %% failed to update a nonexistent doc + Expect0 = #{match => 1, success => 0, dropped => 0, failed => 1, queuing => 0}, + check_send_message_with_action(<<"es/1">>, update_action, update_connector, Expect0), + %% doc_as_upsert to insert a new doc + ActionConfig2 = ActionConfig1#{ + <<"parameters">> => #{ + <<"index">> => <<"${payload.index}">>, + <<"id">> => DocId, + <<"action">> => <<"update">>, + <<"doc">> => <<"${payload.doc}">>, + <<"doc_as_upsert">> => true, + <<"max_retries">> => 0 + } + }, + {ok, _} = emqx_bridge_v2:create(?TYPE, update_action, ActionConfig2), + Expect1 = #{match => 1, success => 1, dropped => 0, failed => 0, queuing => 0}, + check_send_message_with_action(<<"es/1">>, update_action, update_connector, Expect1), + %% update without doc, use msg as default + ActionConfig3 = ActionConfig1#{ + <<"parameters">> => #{ + <<"index">> => <<"${payload.index}">>, + <<"id">> => DocId, + <<"action">> => <<"update">>, + <<"max_retries">> => 0 + } + }, + {ok, _} = emqx_bridge_v2:create(?TYPE, update_action, ActionConfig3), + Expect2 = #{match => 1, success => 1, dropped => 0, failed => 0, queuing => 0}, + check_send_message_with_action(<<"es/1">>, update_action, update_connector, Expect2), + %% Clean + ok = emqx_bridge_v2:remove(?TYPE, update_action), + emqx_connector:remove(?TYPE, update_connector), + lists:foreach( + fun(#{id := Id}) -> + emqx_rule_engine:delete_rule(Id) + end, + emqx_rule_engine:get_rules() + ), ok. %% Test that we can get the status of the bridge V2 From 1be5b6aaf3ec44867ba96b61f558c66befce30a6 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 23 Jan 2024 09:39:37 -0300 Subject: [PATCH 51/89] fix(mqtt_action): use overridden `resource_opts` Fixes https://emqx.atlassian.net/browse/EMQX-11790 --- .../src/emqx_bridge_mqtt_pubsub_schema.erl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl index fdb14315c..e49aa3da3 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_pubsub_schema.erl @@ -48,13 +48,14 @@ fields(action) -> )}; fields("mqtt_publisher_action") -> emqx_bridge_v2_schema:make_producer_action_schema( - hoconsc:mk( - hoconsc:ref(?MODULE, action_parameters), + mk( + ref(?MODULE, action_parameters), #{ required => true, desc => ?DESC("action_parameters") } - ) + ), + #{resource_opts_ref => ref(?MODULE, action_resource_opts)} ); fields(action_parameters) -> [ @@ -86,7 +87,8 @@ fields("mqtt_subscriber_source") -> required => true, desc => ?DESC("source_parameters") } - ) + ), + #{resource_opts_ref => ref(?MODULE, source_resource_opts)} ); fields(ingress_parameters) -> [ From 67c9e9c8fc0f6044f119dce950e347e05a3a51d6 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 23 Jan 2024 10:40:02 -0300 Subject: [PATCH 52/89] fix(actions_api): reset metrics on all nodes Fixes https://emqx.atlassian.net/browse/EMQX-11796 --- apps/emqx_bridge/src/emqx_bridge_v2.erl | 4 +- .../test/emqx_bridge_v2_api_SUITE.erl | 51 +++++++++++++++---- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge_v2.erl b/apps/emqx_bridge/src/emqx_bridge_v2.erl index 622dbf464..7ee384b84 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2.erl @@ -647,8 +647,8 @@ reset_metrics(ConfRootKey, Type, Name) -> reset_metrics_helper(_ConfRootKey, _Type, _Name, #{enable := false}) -> ok; reset_metrics_helper(ConfRootKey, BridgeV2Type, BridgeName, #{connector := ConnectorName}) -> - BridgeV2Id = id_with_root_name(ConfRootKey, BridgeV2Type, BridgeName, ConnectorName), - ok = emqx_metrics_worker:reset_metrics(?RES_METRICS, BridgeV2Id); + ResourceId = id_with_root_name(ConfRootKey, BridgeV2Type, BridgeName, ConnectorName), + emqx_resource:reset_metrics(ResourceId); reset_metrics_helper(_, _, _, _) -> {error, not_found}. diff --git a/apps/emqx_bridge/test/emqx_bridge_v2_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_v2_api_SUITE.erl index fc9c9573f..4f98baebf 100644 --- a/apps/emqx_bridge/test/emqx_bridge_v2_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_v2_api_SUITE.erl @@ -286,6 +286,10 @@ init_mocks() -> ok = meck:expect(?CONNECTOR_IMPL, on_get_channels, fun(ResId) -> emqx_bridge_v2:get_channels_for_connector(ResId) end), + meck:expect(?CONNECTOR_IMPL, on_query_async, fun(_ResId, _Req, ReplyFunAndArgs, _ConnState) -> + emqx_resource:apply_reply_fun(ReplyFunAndArgs, ok), + {ok, self()} + end), ok. clear_resources() -> @@ -378,6 +382,9 @@ enable_path(Enable, BridgeID) -> publish_message(Topic, Body, Config) -> Node = ?config(node, Config), + publish_message(Topic, Body, Node, Config). + +publish_message(Topic, Body, Node, _Config) -> erpc:call(Node, emqx, publish, [emqx_message:make(Topic, Body)]). update_config(Path, Value, Config) -> @@ -524,6 +531,17 @@ get_common_values(Kind, FnName) -> } end. +maybe_get_other_node(Config) -> + %% In the single node test group, this simply returns the lone node. Otherwise, it'll + %% return a node that's not the primary one that receives API calls. + PrimaryNode = ?config(node, Config), + case proplists:get_value(cluster_nodes, Config, []) -- [PrimaryNode] of + [] -> + PrimaryNode; + [OtherNode | _] -> + OtherNode + end. + %%------------------------------------------------------------------------------ %% Testcases %%------------------------------------------------------------------------------ @@ -1385,7 +1403,8 @@ t_reset_metrics(Config) -> ActionID = emqx_bridge_resource:bridge_id(?ACTION_TYPE, ActionName), Body = <<"my msg">>, - _ = publish_message(?MQTT_LOCAL_TOPIC, Body, Config), + OtherNode = maybe_get_other_node(Config), + _ = publish_message(?MQTT_LOCAL_TOPIC, Body, OtherNode, Config), ?retry( _Sleep0 = 200, _Retries0 = 20, @@ -1400,16 +1419,30 @@ t_reset_metrics(Config) -> {ok, 204, <<>>} = request(put, uri([?ACTIONS_ROOT, ActionID, "metrics", "reset"]), Config), - ?retry( + Res = ?retry( _Sleep0 = 200, _Retries0 = 20, - ?assertMatch( - {ok, 200, #{ - <<"metrics">> := #{<<"matched">> := 0}, - <<"node_metrics">> := [#{<<"metrics">> := #{}} | _] - }}, - request_json(get, uri([?ACTIONS_ROOT, ActionID, "metrics"]), Config) - ) + begin + Res0 = request_json(get, uri([?ACTIONS_ROOT, ActionID, "metrics"]), Config), + ?assertMatch( + {ok, 200, #{ + <<"metrics">> := #{<<"matched">> := 0}, + <<"node_metrics">> := [#{<<"metrics">> := #{}} | _] + }}, + Res0 + ), + Res0 + end + ), + {ok, 200, #{<<"node_metrics">> := NodeMetrics}} = Res, + ?assert( + lists:all( + fun(#{<<"metrics">> := #{<<"matched">> := Matched}}) -> + Matched == 0 + end, + NodeMetrics + ), + #{node_metrics => NodeMetrics} ), ok. From 497ec043596f1dafad2fed5e53be137842d2c686 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 23 Jan 2024 09:50:01 +0800 Subject: [PATCH 53/89] fix(prom): missing callback attribute --- apps/emqx_prometheus/src/emqx_prometheus.erl | 4 +--- apps/emqx_prometheus/src/emqx_prometheus_cluster.erl | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index af35acc36..2ddfc4def 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -90,8 +90,6 @@ -define(HTTP_OPTIONS, [{autoredirect, true}, {timeout, 60000}]). --define(LOGICAL_SUM_METRIC_NAMES, []). - %%-------------------------------------------------------------------- %% APIs %%-------------------------------------------------------------------- @@ -280,7 +278,7 @@ aggre_or_zip_init_acc() -> }. logic_sum_metrics() -> - ?LOGICAL_SUM_METRIC_NAMES. + []. %%-------------------------------------------------------------------- %% Collector diff --git a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl index 2a68c7b3b..ae9b47c39 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl @@ -38,6 +38,8 @@ -callback aggre_or_zip_init_acc() -> map(). +-callback logic_sum_metrics() -> list(). + -define(MG(K, MAP), maps:get(K, MAP)). -define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). From 731efd8b49dd41c1f0b684ee5761503bd2ce5d43 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 23 Jan 2024 16:57:08 +0800 Subject: [PATCH 54/89] fix(prom): cluster aggre/unaggre labels --- apps/emqx_prometheus/src/emqx_prometheus.erl | 58 +++++++----- .../src/emqx_prometheus_auth.erl | 61 ++++++++----- .../src/emqx_prometheus_cluster.erl | 90 +++++++++---------- .../src/emqx_prometheus_data_integration.erl | 56 +++++++----- 4 files changed, 151 insertions(+), 114 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 2ddfc4def..7bbfd5bd5 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -24,7 +24,7 @@ -behaviour(emqx_prometheus_cluster). -export([ - fetch_data_from_local_node/0, + fetch_from_local_node/1, fetch_cluster_consistented_data/0, aggre_or_zip_init_acc/0, logic_sum_metrics/0 @@ -241,20 +241,20 @@ add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). %% behaviour -fetch_data_from_local_node() -> +fetch_from_local_node(Mode) -> {node(self()), #{ - stats_data => stats_data(), - vm_data => vm_data(), - cluster_data => cluster_data(), + stats_data => stats_data(Mode), + vm_data => vm_data(Mode), + cluster_data => cluster_data(Mode), %% Metrics - emqx_packet_data => emqx_metric_data(emqx_packet_metric_meta()), - emqx_message_data => emqx_metric_data(message_metric_meta()), - emqx_delivery_data => emqx_metric_data(delivery_metric_meta()), - emqx_client_data => emqx_metric_data(client_metric_meta()), - emqx_session_data => emqx_metric_data(session_metric_meta()), - emqx_olp_data => emqx_metric_data(olp_metric_meta()), - emqx_acl_data => emqx_metric_data(acl_metric_meta()), - emqx_authn_data => emqx_metric_data(authn_metric_meta()) + emqx_packet_data => emqx_metric_data(emqx_packet_metric_meta(), Mode), + emqx_message_data => emqx_metric_data(message_metric_meta(), Mode), + emqx_delivery_data => emqx_metric_data(delivery_metric_meta(), Mode), + emqx_client_data => emqx_metric_data(client_metric_meta(), Mode), + emqx_session_data => emqx_metric_data(session_metric_meta(), Mode), + emqx_olp_data => emqx_metric_data(olp_metric_meta(), Mode), + emqx_acl_data => emqx_metric_data(acl_metric_meta(), Mode), + emqx_authn_data => emqx_metric_data(authn_metric_meta(), Mode) }}. fetch_cluster_consistented_data() -> @@ -495,11 +495,11 @@ stats_metric_meta() -> {emqx_delayed_max, counter, 'delayed.max'} ]. -stats_data() -> +stats_data(Mode) -> Stats = emqx_stats:getstats(), lists:foldl( fun({Name, _Type, MetricKAtom}, AccIn) -> - AccIn#{Name => [{[], ?C(MetricKAtom, Stats)}]} + AccIn#{Name => [{with_node_label(Mode, []), ?C(MetricKAtom, Stats)}]} end, #{}, stats_metric_meta() @@ -519,11 +519,18 @@ vm_metric_meta() -> {emqx_vm_used_memory, gauge, 'used_memory'} ]. -vm_data() -> +vm_data(Mode) -> VmStats = emqx_mgmt:vm_stats(), lists:foldl( fun({Name, _Type, MetricKAtom}, AccIn) -> - AccIn#{Name => [{[], ?C(MetricKAtom, VmStats)}]} + Labels = + case Mode of + node -> + []; + _ -> + [{node, node(self())}] + end, + AccIn#{Name => [{Labels, ?C(MetricKAtom, VmStats)}]} end, #{}, vm_metric_meta() @@ -539,23 +546,23 @@ cluster_metric_meta() -> {emqx_cluster_nodes_stopped, gauge, undefined} ]. -cluster_data() -> +cluster_data(Mode) -> Running = emqx:cluster_nodes(running), Stopped = emqx:cluster_nodes(stopped), #{ - emqx_cluster_nodes_running => [{[], length(Running)}], - emqx_cluster_nodes_stopped => [{[], length(Stopped)}] + emqx_cluster_nodes_running => [{with_node_label(Mode, []), length(Running)}], + emqx_cluster_nodes_stopped => [{with_node_label(Mode, []), length(Stopped)}] }. %%======================================== %% Metrics %%======================================== -emqx_metric_data(MetricNameTypeKeyL) -> +emqx_metric_data(MetricNameTypeKeyL, Mode) -> Metrics = emqx_metrics:all(), lists:foldl( fun({Name, _Type, MetricKAtom}, AccIn) -> - AccIn#{Name => [{[], ?C(MetricKAtom, Metrics)}]} + AccIn#{Name => [{with_node_label(Mode, []), ?C(MetricKAtom, Metrics)}]} end, #{}, MetricNameTypeKeyL @@ -911,6 +918,13 @@ zip_json_prom_stats_metrics(Key, Points, AllResultedAcc) -> metrics_name(MetricsAll) -> [Name || {Name, _, _} <- MetricsAll]. +with_node_label(?PROM_DATA_MODE__NODE, Labels) -> + Labels; +with_node_label(?PROM_DATA_MODE__ALL_NODES_AGGREGATED, Labels) -> + Labels; +with_node_label(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED, Labels) -> + [{node, node(self())} | Labels]. + %%-------------------------------------------------------------------- %% bpapi diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 0d0607518..e5574952c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -26,7 +26,7 @@ %% for bpapi -behaviour(emqx_prometheus_cluster). -export([ - fetch_data_from_local_node/0, + fetch_from_local_node/1, fetch_cluster_consistented_data/0, aggre_or_zip_init_acc/0, logic_sum_metrics/0 @@ -126,10 +126,10 @@ collect_metrics(Name, Metrics) -> collect_auth(Name, Metrics). %% behaviour -fetch_data_from_local_node() -> +fetch_from_local_node(Mode) -> {node(self()), #{ - authn_data => authn_data(), - authz_data => authz_data() + authn_data => authn_data(Mode), + authz_data => authz_data(Mode) }}. fetch_cluster_consistented_data() -> @@ -224,38 +224,41 @@ authn_metric_meta() -> authn_metric(names) -> emqx_prometheus_cluster:metric_names(authn_metric_meta()). --spec authn_data() -> #{Key => [Point]} when +-spec authn_data(atom()) -> #{Key => [Point]} when Key :: authn_metric_name(), Point :: {[Label], Metric}, Label :: IdLabel, IdLabel :: {id, AuthnName :: binary()}, Metric :: number(). -authn_data() -> +authn_data(Mode) -> Authns = emqx_config:get([authentication]), lists:foldl( fun(Key, AccIn) -> - AccIn#{Key => authn_backend_to_points(Key, Authns)} + AccIn#{Key => authn_backend_to_points(Mode, Key, Authns)} end, #{}, authn_metric(names) ). --spec authn_backend_to_points(Key, list(Authn)) -> list(Point) when +-spec authn_backend_to_points(atom(), Key, list(Authn)) -> list(Point) when Key :: authn_metric_name(), Authn :: map(), Point :: {[Label], Metric}, Label :: IdLabel, IdLabel :: {id, AuthnName :: binary()}, Metric :: number(). -authn_backend_to_points(Key, Authns) -> - do_authn_backend_to_points(Key, Authns, []). +authn_backend_to_points(Mode, Key, Authns) -> + do_authn_backend_to_points(Mode, Key, Authns, []). -do_authn_backend_to_points(_K, [], AccIn) -> +do_authn_backend_to_points(_Mode, _K, [], AccIn) -> lists:reverse(AccIn); -do_authn_backend_to_points(K, [Authn | Rest], AccIn) -> +do_authn_backend_to_points(Mode, K, [Authn | Rest], AccIn) -> Id = authenticator_id(Authn), - Point = {[{id, Id}], do_metric(K, Authn, lookup_authn_metrics_local(Id))}, - do_authn_backend_to_points(K, Rest, [Point | AccIn]). + Point = { + with_node_label(Mode, [{id, Id}]), + do_metric(K, Authn, lookup_authn_metrics_local(Id)) + }, + do_authn_backend_to_points(Mode, K, Rest, [Point | AccIn]). lookup_authn_metrics_local(Id) -> case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of @@ -317,38 +320,41 @@ authz_metric_meta() -> authz_metric(names) -> emqx_prometheus_cluster:metric_names(authz_metric_meta()). --spec authz_data() -> #{Key => [Point]} when +-spec authz_data(atom()) -> #{Key => [Point]} when Key :: authz_metric_name(), Point :: {[Label], Metric}, Label :: TypeLabel, TypeLabel :: {type, AuthZType :: binary()}, Metric :: number(). -authz_data() -> +authz_data(Mode) -> Authzs = emqx_config:get([authorization, sources]), lists:foldl( fun(Key, AccIn) -> - AccIn#{Key => authz_backend_to_points(Key, Authzs)} + AccIn#{Key => authz_backend_to_points(Mode, Key, Authzs)} end, #{}, authz_metric(names) ). --spec authz_backend_to_points(Key, list(Authz)) -> list(Point) when +-spec authz_backend_to_points(atom(), Key, list(Authz)) -> list(Point) when Key :: authz_metric_name(), Authz :: map(), Point :: {[Label], Metric}, Label :: TypeLabel, TypeLabel :: {type, AuthZType :: binary()}, Metric :: number(). -authz_backend_to_points(Key, Authzs) -> - do_authz_backend_to_points(Key, Authzs, []). +authz_backend_to_points(Mode, Key, Authzs) -> + do_authz_backend_to_points(Mode, Key, Authzs, []). -do_authz_backend_to_points(_K, [], AccIn) -> +do_authz_backend_to_points(_Mode, _K, [], AccIn) -> lists:reverse(AccIn); -do_authz_backend_to_points(K, [Authz | Rest], AccIn) -> +do_authz_backend_to_points(Mode, K, [Authz | Rest], AccIn) -> Type = maps:get(type, Authz), - Point = {[{type, Type}], do_metric(K, Authz, lookup_authz_metrics_local(Type))}, - do_authz_backend_to_points(K, Rest, [Point | AccIn]). + Point = { + with_node_label(Mode, [{type, Type}]), + do_metric(K, Authz, lookup_authz_metrics_local(Type)) + }, + do_authz_backend_to_points(Mode, K, Rest, [Point | AccIn]). lookup_authz_metrics_local(Type) -> case emqx_authz_api_sources:lookup_from_local_node(Type) of @@ -481,3 +487,10 @@ do_metric(emqx_authn_enable, #{enable := B}, _) -> emqx_prometheus_cluster:boolean_to_number(B); do_metric(K, _, Metrics) -> ?MG0(K, Metrics). + +with_node_label(?PROM_DATA_MODE__NODE, Labels) -> + Labels; +with_node_label(?PROM_DATA_MODE__ALL_NODES_AGGREGATED, Labels) -> + Labels; +with_node_label(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED, Labels) -> + [{node, node(self())} | Labels]. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl index ae9b47c39..02209be22 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl @@ -23,7 +23,7 @@ collect_json_data/2, aggre_cluster/3, - with_node_name_label/2, + %% with_node_name_label/2, point_to_map_fun/1, @@ -34,7 +34,7 @@ -callback fetch_cluster_consistented_data() -> map(). --callback fetch_data_from_local_node() -> {node(), map()}. +-callback fetch_from_local_node(atom()) -> {node(), map()}. -callback aggre_or_zip_init_acc() -> map(). @@ -46,23 +46,23 @@ raw_data(Module, undefined) -> %% TODO: for push gateway, the format mode should be configurable raw_data(Module, ?PROM_DATA_MODE__NODE); -raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> - AllNodesMetrics = aggre_cluster(Module), +raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_AGGREGATED = Mode) -> + AllNodesMetrics = aggre_cluster(Module, Mode), Cluster = Module:fetch_cluster_consistented_data(), maps:merge(AllNodesMetrics, Cluster); -raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> - AllNodesMetrics = with_node_name_label(Module), +raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED = Mode) -> + AllNodesMetrics = zip_cluster_data(Module, Mode), Cluster = Module:fetch_cluster_consistented_data(), maps:merge(AllNodesMetrics, Cluster); -raw_data(Module, ?PROM_DATA_MODE__NODE) -> - {_Node, LocalNodeMetrics} = Module:fetch_data_from_local_node(), +raw_data(Module, ?PROM_DATA_MODE__NODE = Mode) -> + {_Node, LocalNodeMetrics} = Module:fetch_from_local_node(Mode), Cluster = Module:fetch_cluster_consistented_data(), maps:merge(LocalNodeMetrics, Cluster). -metrics_data_from_all_nodes(Module) -> +fetch_data_from_all_nodes(Module, Mode) -> Nodes = mria:running_nodes(), _ResL = emqx_prometheus_proto_v2:raw_prom_data( - Nodes, Module, fetch_data_from_local_node, [] + Nodes, Module, fetch_from_local_node, [Mode] ). collect_json_data(Data, Func) when is_function(Func, 3) -> @@ -76,17 +76,13 @@ collect_json_data(Data, Func) when is_function(Func, 3) -> collect_json_data(_, _) -> error(badarg). -aggre_cluster(Module) -> +aggre_cluster(Module, Mode) -> do_aggre_cluster( Module:logic_sum_metrics(), - metrics_data_from_all_nodes(Module), + fetch_data_from_all_nodes(Module, Mode), Module:aggre_or_zip_init_acc() ). -with_node_name_label(Module) -> - ResL = metrics_data_from_all_nodes(Module), - do_with_node_name_label(ResL, Module:aggre_or_zip_init_acc()). - aggre_cluster(LogicSumKs, ResL, Init) -> do_aggre_cluster(LogicSumKs, ResL, Init). @@ -121,61 +117,65 @@ aggre_metric(LogicSumKs, NodeMetrics, AccIn0) -> do_aggre_metric(K, LogicSumKs, NodeMetrics, AccL) -> lists:foldl( - fun({Labels, Metric}, AccIn) -> - NMetric = - case lists:member(K, LogicSumKs) of - true -> - logic_sum(Metric, ?PG0(Labels, AccIn)); - false -> - Metric + ?PG0(Labels, AccIn) - end, - [{Labels, NMetric} | AccIn] + fun(Point = {_Labels, _Metric}, AccIn) -> + sum(K, LogicSumKs, Point, AccIn) end, AccL, NodeMetrics ). -with_node_name_label(ResL, Init) -> - do_with_node_name_label(ResL, Init). +sum(K, LogicSumKs, {Labels, Metric} = Point, MetricAccL) -> + case lists:keytake(Labels, 1, MetricAccL) of + {value, {Labels, MetricAcc}, NMetricAccL} -> + NPoint = {Labels, do_sum(K, LogicSumKs, Metric, MetricAcc)}, + [NPoint | NMetricAccL]; + false -> + [Point | MetricAccL] + end. -do_with_node_name_label([], AccIn) -> +do_sum(K, LogicSumKs, Metric, MetricAcc) -> + case lists:member(K, LogicSumKs) of + true -> + logic_sum(Metric, MetricAcc); + false -> + Metric + MetricAcc + end. + +zip_cluster_data(Module, Mode) -> + zip_cluster( + fetch_data_from_all_nodes(Module, Mode), + Module:aggre_or_zip_init_acc() + ). + +zip_cluster([], AccIn) -> AccIn; -do_with_node_name_label([{ok, {NodeName, NodeMetric}} | Rest], AccIn) -> - do_with_node_name_label( +zip_cluster([{ok, {_NodeName, NodeMetric}} | Rest], AccIn) -> + zip_cluster( Rest, maps:fold( fun(K, V, AccIn0) -> AccIn0#{ - K => zip_with_node_name(NodeName, V, ?MG(K, AccIn0)) + K => do_zip_cluster(V, ?MG(K, AccIn0)) } end, AccIn, NodeMetric ) ); -do_with_node_name_label([{_, _} | Rest], AccIn) -> - do_with_node_name_label(Rest, AccIn). +zip_cluster([{_, _} | Rest], AccIn) -> + zip_cluster(Rest, AccIn). -zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> +do_zip_cluster(NodeMetrics, AccIn0) -> lists:foldl( fun(K, AccIn) -> - NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccMetricL = ?MG(K, AccIn), + NAccL = ?MG(K, NodeMetrics) ++ AccMetricL, AccIn#{K => NAccL} end, AccIn0, maps:keys(NodeMetrics) ). -do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NLabels = [{node, NodeName} | Labels], - [{NLabels, Metric} | AccIn] - end, - AccL, - NodeMetrics - ). - point_to_map_fun(Key) -> fun({Lables, Metric}, AccIn2) -> LablesKVMap = maps:from_list(Lables), diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 008a029a8..15fbe8106 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -31,7 +31,7 @@ %% for bpapi -behaviour(emqx_prometheus_cluster). -export([ - fetch_data_from_local_node/0, + fetch_from_local_node/1, fetch_cluster_consistented_data/0, aggre_or_zip_init_acc/0, logic_sum_metrics/0 @@ -69,13 +69,13 @@ %% Callback for emqx_prometheus_cluster %%-------------------------------------------------------------------- -fetch_data_from_local_node() -> +fetch_from_local_node(Mode) -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), {node(self()), #{ - rule_metric_data => rule_metric_data(Rules), - action_metric_data => action_metric_data(Bridges), - connector_metric_data => connector_metric_data(Bridges) + rule_metric_data => rule_metric_data(Mode, Rules), + action_metric_data => action_metric_data(Mode, Bridges), + connector_metric_data => connector_metric_data(Mode, Bridges) }}. fetch_cluster_consistented_data() -> @@ -325,26 +325,26 @@ rule_metric_meta() -> rule_metric(names) -> emqx_prometheus_cluster:metric_names(rule_metric_meta()). -rule_metric_data(Rules) -> +rule_metric_data(Mode, Rules) -> lists:foldl( fun(#{id := Id} = Rule, AccIn) -> - merge_acc_with_rules(Id, get_metric(Rule), AccIn) + merge_acc_with_rules(Mode, Id, get_metric(Rule), AccIn) end, maps:from_keys(rule_metric(names), []), Rules ). -merge_acc_with_rules(Id, RuleMetrics, PointsAcc) -> +merge_acc_with_rules(Mode, Id, RuleMetrics, PointsAcc) -> maps:fold( fun(K, V, AccIn) -> - AccIn#{K => [rule_point(Id, V) | ?MG(K, AccIn)]} + AccIn#{K => [rule_point(Mode, Id, V) | ?MG(K, AccIn)]} end, PointsAcc, RuleMetrics ). -rule_point(Id, V) -> - {[{id, Id}], V}. +rule_point(Mode, Id, V) -> + {with_node_label(Mode, [{id, Id}]), V}. get_metric(#{id := Id, enable := Bool} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of @@ -393,27 +393,27 @@ action_metric_meta() -> action_metric(names) -> emqx_prometheus_cluster:metric_names(action_metric_meta()). -action_metric_data(Bridges) -> +action_metric_data(Mode, Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = _Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), - merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) + merge_acc_with_bridges(Mode, Id, get_bridge_metric(Type, Name), AccIn) end, maps:from_keys(action_metric(names), []), Bridges ). -merge_acc_with_bridges(Id, BridgeMetrics, PointsAcc) -> +merge_acc_with_bridges(Mode, Id, BridgeMetrics, PointsAcc) -> maps:fold( fun(K, V, AccIn) -> - AccIn#{K => [action_point(Id, V) | ?MG(K, AccIn)]} + AccIn#{K => [action_point(Mode, Id, V) | ?MG(K, AccIn)]} end, PointsAcc, BridgeMetrics ). -action_point(Id, V) -> - {[{id, Id}], V}. +action_point(Mode, Id, V) -> + {with_node_label(Mode, [{id, Id}]), V}. get_bridge_metric(Type, Name) -> case emqx_bridge:get_metrics(Type, Name) of @@ -453,27 +453,27 @@ connector_metric_meta() -> connectr_metric(names) -> emqx_prometheus_cluster:metric_names(connector_metric_meta()). -connector_metric_data(Bridges) -> +connector_metric_data(Mode, Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), - merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) + merge_acc_with_connectors(Mode, Id, get_connector_status(Bridge), AccIn) end, maps:from_keys(connectr_metric(names), []), Bridges ). -merge_acc_with_connectors(Id, ConnectorMetrics, PointsAcc) -> +merge_acc_with_connectors(Mode, Id, ConnectorMetrics, PointsAcc) -> maps:fold( fun(K, V, AccIn) -> - AccIn#{K => [connector_point(Id, V) | ?MG(K, AccIn)]} + AccIn#{K => [connector_point(Mode, Id, V) | ?MG(K, AccIn)]} end, PointsAcc, ConnectorMetrics ). -connector_point(Id, V) -> - {[{id, Id}], V}. +connector_point(Mode, Id, V) -> + {with_node_label(Mode, [{id, Id}]), V}. get_connector_status(#{resource_data := ResourceData} = _Bridge) -> Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), @@ -532,3 +532,13 @@ zip_json_data_integration_metrics(Key, Points, [] = _AccIn) -> zip_json_data_integration_metrics(Key, Points, AllResultedAcc) -> ThisKeyResult = lists:foldl(emqx_prometheus_cluster:point_to_map_fun(Key), [], Points), lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helper funcs + +with_node_label(?PROM_DATA_MODE__NODE, Labels) -> + Labels; +with_node_label(?PROM_DATA_MODE__ALL_NODES_AGGREGATED, Labels) -> + Labels; +with_node_label(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED, Labels) -> + [{node, node(self())} | Labels]. From 4a1d1b6aebd54612fbe84c7bd7952303085ecc6d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 23 Jan 2024 17:42:15 +0800 Subject: [PATCH 55/89] fix: topics and retained metrics are cluster consistented --- apps/emqx_prometheus/src/emqx_prometheus.erl | 50 +++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 7bbfd5bd5..81eacc9d0 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -192,6 +192,11 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), %% TODO: license expiry epoch and cert expiry epoch should be cached ok = add_collect_family(Callback, stats_metric_meta(), ?MG(stats_data, RawData)), + ok = add_collect_family( + Callback, + stats_metric_cluster_consistened_meta(), + ?MG(stats_data_cluster_consistented, RawData) + ), ok = add_collect_family(Callback, vm_metric_meta(), ?MG(vm_data, RawData)), ok = add_collect_family(Callback, cluster_metric_meta(), ?MG(cluster_data, RawData)), @@ -214,8 +219,8 @@ collect_mf(_Registry, _Callback) -> collect(<<"json">>) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), (maybe_license_collect_json_data(RawData))#{ - stats => collect_json_data(?MG(stats_data, RawData)), - metrics => collect_json_data(?MG(vm_data, RawData)), + stats => collect_stats_json_data(RawData), + metrics => collect_vm_json_data(?MG(vm_data, RawData)), packets => collect_json_data(?MG(emqx_packet_data, RawData)), messages => collect_json_data(?MG(emqx_message_data, RawData)), delivery => collect_json_data(?MG(emqx_delivery_data, RawData)), @@ -259,6 +264,7 @@ fetch_from_local_node(Mode) -> fetch_cluster_consistented_data() -> (maybe_license_fetch_data())#{ + stats_data_cluster_consistented => stats_data_cluster_consistented(), cert_data => cert_data() }. @@ -477,8 +483,6 @@ stats_metric_meta() -> {emqx_channels_count, counter, 'channels.count'}, {emqx_channels_max, counter, 'channels.max'}, %% pub/sub stats - {emqx_topics_count, counter, 'topics.count'}, - {emqx_topics_max, counter, 'topics.max'}, {emqx_suboptions_count, counter, 'suboptions.count'}, {emqx_suboptions_max, counter, 'suboptions.max'}, {emqx_subscribers_count, counter, 'subscribers.count'}, @@ -487,14 +491,21 @@ stats_metric_meta() -> {emqx_subscriptions_max, counter, 'subscriptions.max'}, {emqx_subscriptions_shared_count, counter, 'subscriptions.shared.count'}, {emqx_subscriptions_shared_max, counter, 'subscriptions.shared.max'}, - %% retained - {emqx_retained_count, counter, 'retained.count'}, - {emqx_retained_max, counter, 'retained.max'}, %% delayed {emqx_delayed_count, counter, 'delayed.count'}, {emqx_delayed_max, counter, 'delayed.max'} ]. +stats_metric_cluster_consistened_meta() -> + [ + %% topics + {emqx_topics_max, counter, 'topics.max'}, + {emqx_topics_count, counter, 'topics.count'}, + %% retained + {emqx_retained_count, counter, 'retained.count'}, + {emqx_retained_max, counter, 'retained.max'} + ]. + stats_data(Mode) -> Stats = emqx_stats:getstats(), lists:foldl( @@ -505,6 +516,16 @@ stats_data(Mode) -> stats_metric_meta() ). +stats_data_cluster_consistented() -> + Stats = emqx_stats:getstats(), + lists:foldl( + fun({Name, _Type, MetricKAtom}, AccIn) -> + AccIn#{Name => [{[], ?C(MetricKAtom, Stats)}]} + end, + #{}, + stats_metric_cluster_consistened_meta() + ). + %%======================================== %% Erlang VM %%======================================== @@ -875,10 +896,25 @@ date_to_expiry_epoch(DateTime) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `application/json` +collect_stats_json_data(RawData) -> + StatsData = ?MG(stats_data, RawData), + StatsClData = ?MG(stats_data_cluster_consistented, RawData), + D = maps:merge(StatsData, StatsClData), + collect_json_data(D). + %% always return json array collect_cert_json_data(Data) -> collect_json_data_(Data). +collect_vm_json_data(Data) -> + DataListPerNode = collect_json_data_(Data), + case {?GET_PROM_DATA_MODE(), DataListPerNode} of + {?PROM_DATA_MODE__NODE, [NData | _]} -> + NData; + {_, _} -> + DataListPerNode + end. + collect_json_data(Data0) -> DataListPerNode = collect_json_data_(Data0), case {?GET_PROM_DATA_MODE(), DataListPerNode} of From 6e3ce4d6442b2d8723c73ac36fec6ab23a0b94d3 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 23 Jan 2024 18:35:48 +0800 Subject: [PATCH 56/89] fix(prom_stats): emqx_stats metric type should be gauge --- apps/emqx_prometheus/src/emqx_prometheus.erl | 44 ++++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 81eacc9d0..ffa113a23 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -473,37 +473,37 @@ emqx_collect(K = emqx_cert_expiry_at, D) -> gauge_metrics(?MG(K, D)). stats_metric_meta() -> [ %% connections - {emqx_connections_count, counter, 'connections.count'}, - {emqx_connections_max, counter, 'connections.max'}, - {emqx_live_connections_count, counter, 'live_connections.count'}, - {emqx_live_connections_max, counter, 'live_connections.max'}, + {emqx_connections_count, gauge, 'connections.count'}, + {emqx_connections_max, gauge, 'connections.max'}, + {emqx_live_connections_count, gauge, 'live_connections.count'}, + {emqx_live_connections_max, gauge, 'live_connections.max'}, %% sessions - {emqx_sessions_count, counter, 'sessions.count'}, - {emqx_sessions_max, counter, 'sessions.max'}, - {emqx_channels_count, counter, 'channels.count'}, - {emqx_channels_max, counter, 'channels.max'}, + {emqx_sessions_count, gauge, 'sessions.count'}, + {emqx_sessions_max, gauge, 'sessions.max'}, + {emqx_channels_count, gauge, 'channels.count'}, + {emqx_channels_max, gauge, 'channels.max'}, %% pub/sub stats - {emqx_suboptions_count, counter, 'suboptions.count'}, - {emqx_suboptions_max, counter, 'suboptions.max'}, - {emqx_subscribers_count, counter, 'subscribers.count'}, - {emqx_subscribers_max, counter, 'subscribers.max'}, - {emqx_subscriptions_count, counter, 'subscriptions.count'}, - {emqx_subscriptions_max, counter, 'subscriptions.max'}, - {emqx_subscriptions_shared_count, counter, 'subscriptions.shared.count'}, - {emqx_subscriptions_shared_max, counter, 'subscriptions.shared.max'}, + {emqx_suboptions_count, gauge, 'suboptions.count'}, + {emqx_suboptions_max, gauge, 'suboptions.max'}, + {emqx_subscribers_count, gauge, 'subscribers.count'}, + {emqx_subscribers_max, gauge, 'subscribers.max'}, + {emqx_subscriptions_count, gauge, 'subscriptions.count'}, + {emqx_subscriptions_max, gauge, 'subscriptions.max'}, + {emqx_subscriptions_shared_count, gauge, 'subscriptions.shared.count'}, + {emqx_subscriptions_shared_max, gauge, 'subscriptions.shared.max'}, %% delayed - {emqx_delayed_count, counter, 'delayed.count'}, - {emqx_delayed_max, counter, 'delayed.max'} + {emqx_delayed_count, gauge, 'delayed.count'}, + {emqx_delayed_max, gauge, 'delayed.max'} ]. stats_metric_cluster_consistened_meta() -> [ %% topics - {emqx_topics_max, counter, 'topics.max'}, - {emqx_topics_count, counter, 'topics.count'}, + {emqx_topics_max, gauge, 'topics.max'}, + {emqx_topics_count, gauge, 'topics.count'}, %% retained - {emqx_retained_count, counter, 'retained.count'}, - {emqx_retained_max, counter, 'retained.max'} + {emqx_retained_count, gauge, 'retained.count'}, + {emqx_retained_max, gauge, 'retained.max'} ]. stats_data(Mode) -> From b763e39b328770ffa3e5b2c84ac4267c29fd79b1 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jan 2024 11:04:42 -0300 Subject: [PATCH 57/89] docs: improve descriptions Co-authored-by: ieQu1 <99872536+ieQu1@users.noreply.github.com> --- changes/ce/fix-12359.en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changes/ce/fix-12359.en.md b/changes/ce/fix-12359.en.md index abc0771a0..a4b2a725c 100644 --- a/changes/ce/fix-12359.en.md +++ b/changes/ce/fix-12359.en.md @@ -1 +1 @@ -Fixed an issue that could lead to error logs when restarting a node configured with some types of data bridges. Said bridges could also start in a failed state, requiring manual restart. +Fixed an issue that could lead to error messages when restarting a node configured with some types of data bridges. Said bridges could also start in a failed state, requiring manual restart. From 5e02709de01d8ede4d9143cc51736a48ba1a154f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 24 Jan 2024 15:33:38 +0800 Subject: [PATCH 58/89] fix(prom_auth): authz_enable key from enable status - https://emqx.atlassian.net/browse/EMQX-11800 --- apps/emqx_prometheus/src/emqx_prometheus_auth.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index e5574952c..b7118c344 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -485,6 +485,8 @@ mnesia_size(Tab) -> do_metric(emqx_authn_enable, #{enable := B}, _) -> emqx_prometheus_cluster:boolean_to_number(B); +do_metric(emqx_authz_enable, #{enable := B}, _) -> + emqx_prometheus_cluster:boolean_to_number(B); do_metric(K, _, Metrics) -> ?MG0(K, Metrics). From ace1303de2bb3e204ac953c3ec94498a20531ffd Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 24 Jan 2024 15:36:47 +0800 Subject: [PATCH 59/89] fix(prom_auth): authz metric name: `allow` and `deny` - https://emqx.atlassian.net/browse/EMQX-11799 --- .../emqx_prometheus/src/emqx_prometheus_auth.erl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index b7118c344..f9086cfb8 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -64,8 +64,8 @@ | emqx_authz_status | emqx_authz_nomatch | emqx_authz_total - | emqx_authz_success - | emqx_authz_failed. + | emqx_authz_allow + | emqx_authz_deny. %% Please don't remove this attribute, prometheus uses it to %% automatically register collectors. @@ -186,9 +186,9 @@ collect_auth(K = emqx_authz_nomatch, Data) -> counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_total, Data) -> counter_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_success, Data) -> +collect_auth(K = emqx_authz_allow, Data) -> counter_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_failed, Data) -> +collect_auth(K = emqx_authz_deny, Data) -> counter_metrics(?MG(K, Data)); %%==================== %% Authz rules count @@ -313,8 +313,8 @@ authz_metric_meta() -> {emqx_authz_status, gauge}, {emqx_authz_nomatch, counter}, {emqx_authz_total, counter}, - {emqx_authz_success, counter}, - {emqx_authz_failed, counter} + {emqx_authz_allow, counter}, + {emqx_authz_deny, counter} ]. authz_metric(names) -> @@ -363,8 +363,8 @@ lookup_authz_metrics_local(Type) -> emqx_authz_status => emqx_prometheus_cluster:status_to_number(Status), emqx_authz_nomatch => ?MG0(nomatch, Counters), emqx_authz_total => ?MG0(total, Counters), - emqx_authz_success => ?MG0(success, Counters), - emqx_authz_failed => ?MG0(failed, Counters) + emqx_authz_allow => ?MG0(allow, Counters), + emqx_authz_deny => ?MG0(deny, Counters) }; {error, _Reason} -> maps:from_keys(authz_metric(names) -- [emqx_authz_enable], 0) From 61dd362db0de82b2e168bb619abcf318e83f210f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 24 Jan 2024 16:48:15 +0800 Subject: [PATCH 60/89] fix(prom_di): api crash in `connecting` - https://emqx.atlassian.net/browse/EMQX-11801 --- apps/emqx_prometheus/rebar.config | 1 + apps/emqx_prometheus/src/emqx_prometheus.app.src | 2 +- apps/emqx_prometheus/src/emqx_prometheus_cluster.erl | 11 ++++++----- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/emqx_prometheus/rebar.config b/apps/emqx_prometheus/rebar.config index 649437765..578881d71 100644 --- a/apps/emqx_prometheus/rebar.config +++ b/apps/emqx_prometheus/rebar.config @@ -4,6 +4,7 @@ {emqx, {path, "../emqx"}}, {emqx_utils, {path, "../emqx_utils"}}, {emqx_auth, {path, "../emqx_auth"}}, + {emqx_resource, {path, "../emqx_resource"}}, {prometheus, {git, "https://github.com/emqx/prometheus.erl", {tag, "v4.10.0.2"}}} ]}. diff --git a/apps/emqx_prometheus/src/emqx_prometheus.app.src b/apps/emqx_prometheus/src/emqx_prometheus.app.src index 75c608087..9e9952d6c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.app.src +++ b/apps/emqx_prometheus/src/emqx_prometheus.app.src @@ -5,7 +5,7 @@ {vsn, "5.0.19"}, {modules, []}, {registered, [emqx_prometheus_sup]}, - {applications, [kernel, stdlib, prometheus, emqx, emqx_auth, emqx_management]}, + {applications, [kernel, stdlib, prometheus, emqx, emqx_auth, emqx_resource, emqx_management]}, {mod, {emqx_prometheus_app, []}}, {env, []}, {licenses, ["Apache-2.0"]}, diff --git a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl index 02209be22..c4f1dc3b8 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl @@ -16,6 +16,7 @@ -module(emqx_prometheus_cluster). -include("emqx_prometheus.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). -export([ raw_data/2, @@ -194,11 +195,11 @@ logic_sum(_, _) -> boolean_to_number(true) -> 1; boolean_to_number(false) -> 0. -status_to_number(connected) -> 1; -%% for auth -status_to_number(stopped) -> 0; -%% for data_integration -status_to_number(disconnected) -> 0. +status_to_number(?status_connected) -> 1; +status_to_number(?status_connecting) -> 0; +status_to_number(?status_disconnected) -> 0; +status_to_number(?rm_status_stopped) -> 0; +status_to_number(_) -> 0. metric_names(MetricWithType) when is_list(MetricWithType) -> [Name || {Name, _Type} <- MetricWithType]. From a752119d05ade2eccfc619d8bbb43a732c5b9d61 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 24 Jan 2024 23:40:34 +0800 Subject: [PATCH 61/89] fix(prom_di): use bridge_v2 metrics api --- apps/emqx_bridge/src/emqx_bridge_v2_api.erl | 3 + .../src/emqx_prometheus_data_integration.erl | 69 +++++++++---------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/apps/emqx_bridge/src/emqx_bridge_v2_api.erl b/apps/emqx_bridge/src/emqx_bridge_v2_api.erl index 95471ae5b..6dcd24355 100644 --- a/apps/emqx_bridge/src/emqx_bridge_v2_api.erl +++ b/apps/emqx_bridge/src/emqx_bridge_v2_api.erl @@ -1177,6 +1177,9 @@ format_resource( ) ). +%% FIXME: +%% missing metrics: +%% 'retried.success' and 'retried.failed' format_metrics(#{ counters := #{ 'dropped' := Dropped, diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 15fbe8106..f3bc98f50 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -69,21 +69,24 @@ %% Callback for emqx_prometheus_cluster %%-------------------------------------------------------------------- +-define(ROOT_KEY_ACTIONS, actions). + fetch_from_local_node(Mode) -> Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), + Bridges = emqx_bridge_v2:list(?ROOT_KEY_ACTIONS), + Connectors = emqx_connector:list(), {node(self()), #{ rule_metric_data => rule_metric_data(Mode, Rules), action_metric_data => action_metric_data(Mode, Bridges), - connector_metric_data => connector_metric_data(Mode, Bridges) + connector_metric_data => connector_metric_data(Mode, Connectors) }}. fetch_cluster_consistented_data() -> Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), + Connectors = emqx_connector:list(), (maybe_collect_schema_registry())#{ rules_ov_data => rules_ov_data(Rules), - connectors_ov_data => connectors_ov_data(Bridges) + connectors_ov_data => connectors_ov_data(Connectors) }. aggre_or_zip_init_acc() -> @@ -293,10 +296,10 @@ connectors_ov_metric_meta() -> connectors_ov_metric(names) -> emqx_prometheus_cluster:metric_names(connectors_ov_metric_meta()). -connectors_ov_data(Brdiges) -> +connectors_ov_data(Connectors) -> #{ %% Both Bridge V1 and V2 - emqx_connectors_count => erlang:length(Brdiges) + emqx_connectors_count => erlang:length(Connectors) }. %%======================================== @@ -416,29 +419,25 @@ action_point(Mode, Id, V) -> {with_node_label(Mode, [{id, Id}]), V}. get_bridge_metric(Type, Name) -> - case emqx_bridge:get_metrics(Type, Name) of - #{counters := Counters, gauges := Gauges} -> - #{ - emqx_action_matched => ?MG0(matched, Counters), - emqx_action_dropped => ?MG0(dropped, Counters), - emqx_action_success => ?MG0(success, Counters), - emqx_action_failed => ?MG0(failed, Counters), - emqx_action_inflight => ?MG0(inflight, Gauges), - emqx_action_received => ?MG0(received, Counters), - emqx_action_late_reply => ?MG0(late_reply, Counters), - emqx_action_retried => ?MG0(retried, Counters), - emqx_action_retried_success => ?MG0('retried.success', Counters), - emqx_action_retried_failed => ?MG0('retried.failed', Counters), - emqx_action_dropped_resource_stopped => ?MG0('dropped.resource_stopped', Counters), - emqx_action_dropped_resource_not_found => ?MG0( - 'dropped.resource_not_found', Counters - ), - emqx_action_dropped_queue_full => ?MG0('dropped.queue_full', Counters), - emqx_action_dropped_other => ?MG0('dropped.other', Counters), - emqx_action_dropped_expired => ?MG0('dropped.expired', Counters), - emqx_action_queuing => ?MG0(queuing, Gauges) - } - end. + #{counters := Counters, gauges := Gauges} = emqx_bridge_v2:get_metrics(Type, Name), + #{ + emqx_action_matched => ?MG0(matched, Counters), + emqx_action_dropped => ?MG0(dropped, Counters), + emqx_action_success => ?MG0(success, Counters), + emqx_action_failed => ?MG0(failed, Counters), + emqx_action_inflight => ?MG0(inflight, Gauges), + emqx_action_received => ?MG0(received, Counters), + emqx_action_late_reply => ?MG0(late_reply, Counters), + emqx_action_retried => ?MG0(retried, Counters), + emqx_action_retried_success => ?MG0('retried.success', Counters), + emqx_action_retried_failed => ?MG0('retried.failed', Counters), + emqx_action_dropped_resource_stopped => ?MG0('dropped.resource_stopped', Counters), + emqx_action_dropped_resource_not_found => ?MG0('dropped.resource_not_found', Counters), + emqx_action_dropped_queue_full => ?MG0('dropped.queue_full', Counters), + emqx_action_dropped_other => ?MG0('dropped.other', Counters), + emqx_action_dropped_expired => ?MG0('dropped.expired', Counters), + emqx_action_queuing => ?MG0(queuing, Gauges) + }. %%==================== %% Connector Metric @@ -453,14 +452,14 @@ connector_metric_meta() -> connectr_metric(names) -> emqx_prometheus_cluster:metric_names(connector_metric_meta()). -connector_metric_data(Mode, Bridges) -> +connector_metric_data(Mode, Connectors) -> lists:foldl( - fun(#{type := Type, name := Name} = Bridge, AccIn) -> - Id = emqx_bridge_resource:bridge_id(Type, Name), - merge_acc_with_connectors(Mode, Id, get_connector_status(Bridge), AccIn) + fun(#{type := Type, name := Name} = Connector, AccIn) -> + Id = emqx_connector_resource:connector_id(Type, Name), + merge_acc_with_connectors(Mode, Id, get_connector_status(Connector), AccIn) end, maps:from_keys(connectr_metric(names), []), - Bridges + Connectors ). merge_acc_with_connectors(Mode, Id, ConnectorMetrics, PointsAcc) -> @@ -475,7 +474,7 @@ merge_acc_with_connectors(Mode, Id, ConnectorMetrics, PointsAcc) -> connector_point(Mode, Id, V) -> {with_node_label(Mode, [{id, Id}]), V}. -get_connector_status(#{resource_data := ResourceData} = _Bridge) -> +get_connector_status(#{resource_data := ResourceData} = _Connector) -> Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), Status = ?MG(status, ResourceData), #{ From fd62b59acd5abd4ea3147749359b725740643576 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 24 Jan 2024 23:41:27 +0800 Subject: [PATCH 62/89] chore: bump dashboard vsn --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 48ca7ebcb..0ccd48410 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ endif # Dashboard version # from https://github.com/emqx/emqx-dashboard5 export EMQX_DASHBOARD_VERSION ?= v1.6.1 -export EMQX_EE_DASHBOARD_VERSION ?= e1.5.0-beta.3 +export EMQX_EE_DASHBOARD_VERSION ?= e1.5.0-beta.8 PROFILE ?= emqx REL_PROFILES := emqx emqx-enterprise From 52a7b45d9706336043be921f47f94b88ad30eceb Mon Sep 17 00:00:00 2001 From: JianBo He Date: Thu, 25 Jan 2024 14:47:59 +0800 Subject: [PATCH 63/89] fix: upgrade ekka to 0.18.3 to ensure the leader correct --- apps/emqx/rebar.config | 2 +- .../emqx_management/src/emqx_mgmt_cluster.erl | 8 +++- .../test/emqx_mgmt_api_cluster_SUITE.erl | 38 ++++++++++++++++++- mix.exs | 2 +- rebar.config | 2 +- 5 files changed, 46 insertions(+), 6 deletions(-) diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 8b173d493..bfd981854 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -28,7 +28,7 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.18.1"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.18.3"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.40.4"}}}, {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.3"}}}, diff --git a/apps/emqx_management/src/emqx_mgmt_cluster.erl b/apps/emqx_management/src/emqx_mgmt_cluster.erl index 828567776..be9685f1b 100644 --- a/apps/emqx_management/src/emqx_mgmt_cluster.erl +++ b/apps/emqx_management/src/emqx_mgmt_cluster.erl @@ -69,7 +69,8 @@ handle_call({invite_async, Node, JoinTo}, _From, State) -> undefined -> Caller = self(), Task = spawn_link_invite_worker(Node, JoinTo, Caller), - {reply, ok, State#{Node => Task}}; + State1 = remove_finished_task(Node, State), + {reply, ok, State1#{Node => Task}}; WorkerPid -> {reply, {error, {already_started, WorkerPid}}, State} end; @@ -157,6 +158,11 @@ find_node_name_via_worker_pid(WorkerPid, {Key, Task, I}) -> find_node_name_via_worker_pid(WorkerPid, maps:next(I)) end. +remove_finished_task(Node, State = #{history := History}) -> + State#{history => maps:remove(Node, History)}; +remove_finished_task(_Node, State) -> + State. + state_to_invitation_status(State) -> History = maps:get(history, State, #{}), {Succ, Failed} = lists:foldl( diff --git a/apps/emqx_management/test/emqx_mgmt_api_cluster_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_cluster_SUITE.erl index b2658f8fa..d1438b3ee 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_cluster_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_cluster_SUITE.erl @@ -190,7 +190,7 @@ t_cluster_invite_async(Config) -> lists:sort(Core1Resp) ), - %% force leave the core2 and replicant + %% force leave the core2 {204} = rpc:call( Core1, emqx_mgmt_api_cluster, @@ -260,7 +260,41 @@ t_cluster_invite_async(Config) -> } ], lists:sort(Core1Resp3) - ). + ), + + %% force leave the core2 + {204} = rpc:call( + Core1, + emqx_mgmt_api_cluster, + force_leave, + [delete, #{bindings => #{node => atom_to_binary(Core2)}}] + ), + %% invite core2 again + ?assertMatch( + {200}, + Invite(Core2) + ), + + %% assert: core2 is in_progress status + {200, InvitationStatus1} = rpc:call(Core1, emqx_mgmt_api_cluster, get_invitation_status, [ + get, #{} + ]), + ?assertMatch( + #{succeed := [], in_progress := [#{node := Core2}], failed := []}, + InvitationStatus1 + ), + + %% waiting the async invitation_succeed + ?assertMatch({succeed, _}, waiting_the_async_invitation_succeed(Core1, Core2)), + + {200, InvitationStatus2} = rpc:call(Core1, emqx_mgmt_api_cluster, get_invitation_status, [ + get, #{} + ]), + ?assertMatch( + #{succeed := [#{node := Core2}], in_progress := [], failed := []}, + InvitationStatus2 + ), + ok. cluster(Config) -> NodeSpec = #{apps => ?APPS}, diff --git a/mix.exs b/mix.exs index 7389cb0f9..bb79c3204 100644 --- a/mix.exs +++ b/mix.exs @@ -55,7 +55,7 @@ defmodule EMQXUmbrella.MixProject do {:cowboy, github: "emqx/cowboy", tag: "2.9.2", override: true}, {:esockd, github: "emqx/esockd", tag: "5.11.1", override: true}, {:rocksdb, github: "emqx/erlang-rocksdb", tag: "1.8.0-emqx-2", override: true}, - {:ekka, github: "emqx/ekka", tag: "0.18.1", override: true}, + {:ekka, github: "emqx/ekka", tag: "0.18.3", override: true}, {:gen_rpc, github: "emqx/gen_rpc", tag: "3.3.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.12", override: true}, {:minirest, github: "emqx/minirest", tag: "1.3.15", override: true}, diff --git a/rebar.config b/rebar.config index a1f17c321..7a2e7ff8c 100644 --- a/rebar.config +++ b/rebar.config @@ -83,7 +83,7 @@ {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.11.1"}}}, {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-2"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.18.1"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.18.3"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "3.3.1"}}}, {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.12"}}}, {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.15"}}}, From f52cc93d9d5499c9a1affa59e9c5e270ed8c7a6f Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Tue, 16 Jan 2024 17:09:24 +0200 Subject: [PATCH 64/89] perf(emqx_cm): use a dedicated pool for channel cleanup This is to isolate channels cleanup from other async tasks (like routes cleanup), as channels cleanup can be quite slow under high network latency conditions. Fixes: EMQX-11743 --- apps/emqx/include/emqx_cm.hrl | 2 + apps/emqx/src/emqx_cm.erl | 6 ++- apps/emqx/src/emqx_cm_sup.erl | 4 ++ apps/emqx/src/emqx_pool.erl | 64 +++++++++++++++++++++++--------- apps/emqx/test/emqx_cm_SUITE.erl | 10 +++-- changes/ce/perf-12336.en.md | 2 + 6 files changed, 66 insertions(+), 22 deletions(-) create mode 100644 changes/ce/perf-12336.en.md diff --git a/apps/emqx/include/emqx_cm.hrl b/apps/emqx/include/emqx_cm.hrl index ae70f131f..6478a6162 100644 --- a/apps/emqx/include/emqx_cm.hrl +++ b/apps/emqx/include/emqx_cm.hrl @@ -30,4 +30,6 @@ -define(T_GET_INFO, 5_000). -define(T_TAKEOVER, 15_000). +-define(CM_POOL, emqx_cm_pool). + -endif. diff --git a/apps/emqx/src/emqx_cm.erl b/apps/emqx/src/emqx_cm.erl index 660ac3cfe..2e6714e7f 100644 --- a/apps/emqx/src/emqx_cm.erl +++ b/apps/emqx/src/emqx_cm.erl @@ -670,7 +670,11 @@ handle_info({'DOWN', _MRef, process, Pid, _Reason}, State = #{chan_pmon := PMon} ChanPids = [Pid | emqx_utils:drain_down(BatchSize)], {Items, PMon1} = emqx_pmon:erase_all(ChanPids, PMon), lists:foreach(fun mark_channel_disconnected/1, ChanPids), - ok = emqx_pool:async_submit(fun lists:foreach/2, [fun ?MODULE:clean_down/1, Items]), + ok = emqx_pool:async_submit_to_pool( + ?CM_POOL, + fun lists:foreach/2, + [fun ?MODULE:clean_down/1, Items] + ), {noreply, State#{chan_pmon := PMon1}}; handle_info(Info, State) -> ?SLOG(error, #{msg => "unexpected_info", info => Info}), diff --git a/apps/emqx/src/emqx_cm_sup.erl b/apps/emqx/src/emqx_cm_sup.erl index e7420b4da..622921f1d 100644 --- a/apps/emqx/src/emqx_cm_sup.erl +++ b/apps/emqx/src/emqx_cm_sup.erl @@ -25,6 +25,8 @@ %% for test -export([restart_flapping/0]). +-include("emqx_cm.hrl"). + %%-------------------------------------------------------------------- %% API %%-------------------------------------------------------------------- @@ -45,6 +47,7 @@ init([]) -> Banned = child_spec(emqx_banned, 1000, worker), Flapping = child_spec(emqx_flapping, 1000, worker), Locker = child_spec(emqx_cm_locker, 5000, worker), + CmPool = emqx_pool_sup:spec(emqx_cm_pool_sup, [?CM_POOL, random, {emqx_pool, start_link, []}]), Registry = child_spec(emqx_cm_registry, 5000, worker), Manager = child_spec(emqx_cm, 5000, worker), DSSessionGCSup = child_spec(emqx_persistent_session_ds_sup, infinity, supervisor), @@ -53,6 +56,7 @@ init([]) -> Banned, Flapping, Locker, + CmPool, Registry, Manager, DSSessionGCSup diff --git a/apps/emqx/src/emqx_pool.erl b/apps/emqx/src/emqx_pool.erl index 1cb5f429c..39c585133 100644 --- a/apps/emqx/src/emqx_pool.erl +++ b/apps/emqx/src/emqx_pool.erl @@ -28,11 +28,15 @@ submit/1, submit/2, async_submit/1, - async_submit/2 + async_submit/2, + submit_to_pool/2, + submit_to_pool/3, + async_submit_to_pool/2, + async_submit_to_pool/3 ]). -ifdef(TEST). --export([worker/0, flush_async_tasks/0]). +-export([worker/0, flush_async_tasks/0, flush_async_tasks/1]). -endif. %% gen_server callbacks @@ -57,7 +61,7 @@ -spec start_link(atom(), pos_integer()) -> startlink_ret(). start_link(Pool, Id) -> gen_server:start_link( - {local, emqx_utils:proc_name(?MODULE, Id)}, + {local, emqx_utils:proc_name(Pool, Id)}, ?MODULE, [Pool, Id], [{hibernate_after, 1000}] @@ -66,32 +70,48 @@ start_link(Pool, Id) -> %% @doc Submit work to the pool. -spec submit(task()) -> any(). submit(Task) -> - call({submit, Task}). + submit_to_pool(?POOL, Task). -spec submit(fun(), list(any())) -> any(). submit(Fun, Args) -> - call({submit, {Fun, Args}}). - -%% @private -call(Req) -> - gen_server:call(worker(), Req, infinity). + submit_to_pool(?POOL, Fun, Args). %% @doc Submit work to the pool asynchronously. -spec async_submit(task()) -> ok. async_submit(Task) -> - cast({async_submit, Task}). + async_submit_to_pool(?POOL, Task). -spec async_submit(fun(), list(any())) -> ok. async_submit(Fun, Args) -> - cast({async_submit, {Fun, Args}}). + async_submit_to_pool(?POOL, Fun, Args). + +-spec submit_to_pool(any(), task()) -> any(). +submit_to_pool(Pool, Task) -> + call(Pool, {submit, Task}). + +-spec submit_to_pool(any(), fun(), list(any())) -> any(). +submit_to_pool(Pool, Fun, Args) -> + call(Pool, {submit, {Fun, Args}}). + +-spec async_submit_to_pool(any(), task()) -> ok. +async_submit_to_pool(Pool, Task) -> + cast(Pool, {async_submit, Task}). + +-spec async_submit_to_pool(any(), fun(), list(any())) -> ok. +async_submit_to_pool(Pool, Fun, Args) -> + cast(Pool, {async_submit, {Fun, Args}}). %% @private -cast(Msg) -> - gen_server:cast(worker(), Msg). +call(Pool, Req) -> + gen_server:call(worker(Pool), Req, infinity). %% @private -worker() -> - gproc_pool:pick_worker(?POOL). +cast(Pool, Msg) -> + gen_server:cast(worker(Pool), Msg). + +%% @private +worker(Pool) -> + gproc_pool:pick_worker(Pool). %%-------------------------------------------------------------------- %% gen_server callbacks @@ -146,15 +166,25 @@ run(Fun) when is_function(Fun) -> Fun(). -ifdef(TEST). + +worker() -> + worker(?POOL). + +flush_async_tasks() -> + flush_async_tasks(?POOL). + %% This help function creates a large enough number of async tasks %% to force flush the pool workers. %% The number of tasks should be large enough to ensure all workers have %% the chance to work on at least one of the tasks. -flush_async_tasks() -> +flush_async_tasks(Pool) -> Ref = make_ref(), Self = self(), L = lists:seq(1, 997), - lists:foreach(fun(I) -> emqx_pool:async_submit(fun() -> Self ! {done, Ref, I} end, []) end, L), + lists:foreach( + fun(I) -> emqx_pool:async_submit_to_pool(Pool, fun() -> Self ! {done, Ref, I} end, []) end, + L + ), lists:foreach( fun(I) -> receive diff --git a/apps/emqx/test/emqx_cm_SUITE.erl b/apps/emqx/test/emqx_cm_SUITE.erl index 4ecea9a4b..e175b4349 100644 --- a/apps/emqx/test/emqx_cm_SUITE.erl +++ b/apps/emqx/test/emqx_cm_SUITE.erl @@ -221,7 +221,7 @@ t_open_session_race_condition(_) -> end, %% sync ignored = gen_server:call(?CM, ignore, infinity), - ok = emqx_pool:flush_async_tasks(), + ok = emqx_pool:flush_async_tasks(?CM_POOL), ?assertEqual([], emqx_cm:lookup_channels(ClientId)). t_kick_session_discard_normal(_) -> @@ -343,7 +343,7 @@ test_stepdown_session(Action, Reason) -> end, % sync ignored = gen_server:call(?CM, ignore, infinity), - ok = flush_emqx_pool(), + ok = flush_emqx_cm_pool(), ?assertEqual([], emqx_cm:lookup_channels(ClientId)). %% Channel deregistration is delegated to emqx_pool as a sync tasks. @@ -353,10 +353,12 @@ test_stepdown_session(Action, Reason) -> %% to sync with the pool workers. %% The number of tasks should be large enough to ensure all workers have %% the chance to work on at least one of the tasks. -flush_emqx_pool() -> +flush_emqx_cm_pool() -> Self = self(), L = lists:seq(1, 1000), - lists:foreach(fun(I) -> emqx_pool:async_submit(fun() -> Self ! {done, I} end, []) end, L), + lists:foreach( + fun(I) -> emqx_pool:async_submit_to_pool(?CM_POOL, fun() -> Self ! {done, I} end, []) end, L + ), lists:foreach( fun(I) -> receive diff --git a/changes/ce/perf-12336.en.md b/changes/ce/perf-12336.en.md new file mode 100644 index 000000000..5c385e6b6 --- /dev/null +++ b/changes/ce/perf-12336.en.md @@ -0,0 +1,2 @@ +Isolate channels cleanup from other async tasks (like routes cleanup) by using a dedicated pool, +as this task can be quite slow under high network latency conditions. From a8c6280a5e8e0f2325e3d11d0676e7955ed9fadf Mon Sep 17 00:00:00 2001 From: Serge Tupchii Date: Tue, 16 Jan 2024 19:42:37 +0200 Subject: [PATCH 65/89] test(emqx_cm_SUITE): use one helper function: `emqx_pool:flush_async_tasks/1` --- apps/emqx/test/emqx_cm_SUITE.erl | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/apps/emqx/test/emqx_cm_SUITE.erl b/apps/emqx/test/emqx_cm_SUITE.erl index e175b4349..aba4bc744 100644 --- a/apps/emqx/test/emqx_cm_SUITE.erl +++ b/apps/emqx/test/emqx_cm_SUITE.erl @@ -343,31 +343,9 @@ test_stepdown_session(Action, Reason) -> end, % sync ignored = gen_server:call(?CM, ignore, infinity), - ok = flush_emqx_cm_pool(), + ok = emqx_pool:flush_async_tasks(?CM_POOL), ?assertEqual([], emqx_cm:lookup_channels(ClientId)). -%% Channel deregistration is delegated to emqx_pool as a sync tasks. -%% The emqx_pool is pool of workers, and there is no way to know -%% which worker was picked for the last deregistration task. -%% This help function creates a large enough number of async tasks -%% to sync with the pool workers. -%% The number of tasks should be large enough to ensure all workers have -%% the chance to work on at least one of the tasks. -flush_emqx_cm_pool() -> - Self = self(), - L = lists:seq(1, 1000), - lists:foreach( - fun(I) -> emqx_pool:async_submit_to_pool(?CM_POOL, fun() -> Self ! {done, I} end, []) end, L - ), - lists:foreach( - fun(I) -> - receive - {done, I} -> ok - end - end, - L - ). - t_discard_session_race(_) -> ClientId = rand_client_id(), ?check_trace( From adf22f1f10f409bdd27ecb7a8c174d65a4e7053f Mon Sep 17 00:00:00 2001 From: Ilya Averyanov Date: Wed, 17 Jan 2024 22:44:51 +0300 Subject: [PATCH 66/89] fix(mqtt_bridge): render valid messages from incomplete rule data --- .../src/emqx_bridge_mqtt_msg.erl | 23 +++++++++---- .../test/emqx_bridge_mqtt_SUITE.erl | 34 +++++++++++++++++++ changes/ce/fix-12347.en.md | 4 +++ 3 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 changes/ce/fix-12347.en.md diff --git a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_msg.erl b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_msg.erl index 48cae70d7..e09866429 100644 --- a/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_msg.erl +++ b/apps/emqx_bridge_mqtt/src/emqx_bridge_mqtt_msg.erl @@ -16,6 +16,8 @@ -module(emqx_bridge_mqtt_msg). +-include_lib("emqx/include/emqx_mqtt.hrl"). + -export([parse/1]). -export([render/2]). @@ -66,8 +68,8 @@ render( #{ topic => render_string(TopicToken, Msg), payload => render_payload(Vars, Msg), - qos => render_simple_var(QoSToken, Msg), - retain => render_simple_var(RetainToken, Msg) + qos => render_simple_var(QoSToken, Msg, ?QOS_0), + retain => render_simple_var(RetainToken, Msg, false) }. render_payload(From, MapMsg) -> @@ -80,16 +82,23 @@ do_render_payload(Tks, Msg) -> %% Replace a string contains vars to another string in which the placeholders are replace by the %% corresponding values. For example, given "a: ${var}", if the var=1, the result string will be: -%% "a: 1". +%% "a: 1". Undefined vars will be replaced by empty strings. render_string(Tokens, Data) when is_list(Tokens) -> - emqx_placeholder:proc_tmpl(Tokens, Data, #{return => full_binary}); + emqx_placeholder:proc_tmpl(Tokens, Data, #{ + return => full_binary, var_trans => fun undefined_as_empty/1 + }); render_string(Val, _Data) -> Val. +undefined_as_empty(undefined) -> + <<>>; +undefined_as_empty(Val) -> + emqx_utils_conv:bin(Val). + %% Replace a simple var to its value. For example, given "${var}", if the var=1, then the result %% value will be an integer 1. -render_simple_var(Tokens, Data) when is_list(Tokens) -> +render_simple_var(Tokens, Data, Default) when is_list(Tokens) -> [Var] = emqx_placeholder:proc_tmpl(Tokens, Data, #{return => rawlist}), - Var; -render_simple_var(Val, _Data) -> + emqx_maybe:define(Var, Default); +render_simple_var(Val, _Data, _Default) -> Val. diff --git a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl index 807fba3c9..c6850ab8e 100644 --- a/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl +++ b/apps/emqx_bridge_mqtt/test/emqx_bridge_mqtt_SUITE.erl @@ -836,6 +836,40 @@ t_egress_mqtt_bridge_with_rules(_) -> {ok, 204, <<>>} = request(delete, uri(["rules", RuleId]), []), {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []). +t_egress_mqtt_bridge_with_dummy_rule(_) -> + BridgeIDEgress = create_bridge( + ?SERVER_CONF#{ + <<"name">> => ?BRIDGE_NAME_EGRESS, + <<"egress">> => ?EGRESS_CONF + } + ), + + {ok, 201, Rule} = request( + post, + uri(["rules"]), + #{ + <<"name">> => <<"A_rule_send_empty_messages_to_a_sink_mqtt_bridge">>, + <<"enable">> => true, + <<"actions">> => [BridgeIDEgress], + %% select something useless from what a message cannot be composed + <<"sql">> => <<"SELECT x from \"t/1\"">> + } + ), + #{<<"id">> := RuleId} = emqx_utils_json:decode(Rule), + + %% PUBLISH a message to the rule. + Payload = <<"hi">>, + RuleTopic = <<"t/1">>, + RemoteTopic = <>, + emqx:subscribe(RemoteTopic), + timer:sleep(100), + emqx:publish(emqx_message:make(RuleTopic, Payload)), + %% we should receive a message on the "remote" broker, with specified topic + assert_mqtt_msg_received(RemoteTopic, <<>>), + + {ok, 204, <<>>} = request(delete, uri(["rules", RuleId]), []), + {ok, 204, <<>>} = request(delete, uri(["bridges", BridgeIDEgress]), []). + t_mqtt_conn_bridge_egress_reconnect(_) -> %% then we add a mqtt connector, using POST BridgeIDEgress = create_bridge( diff --git a/changes/ce/fix-12347.en.md b/changes/ce/fix-12347.en.md new file mode 100644 index 000000000..b10738192 --- /dev/null +++ b/changes/ce/fix-12347.en.md @@ -0,0 +1,4 @@ +Always render valid messages for egress MQTT data bridge from the data fetched by Rule SQL, even if the data is incomplete and placeholders used in the bridge configuration are missing. +Previously, some messages were rendered as invalid and were discarded by the MQTT egress data bridge. + +Render undefined variables as empty strings in `payload` and `topic` templates of the MQTT egress data bridge. Previously, undefined variables were rendered as `undefined` strings. From 50db0558413c4c41d7ab32aadc2ce0cd2dd6b8dc Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 16 Jan 2024 17:31:28 -0300 Subject: [PATCH 67/89] ci(dev): add `.git/` to `.gitignore` For some reason, some tools like [ripgrep](https://github.com/BurntSushi/ripgrep) will search `.git` when using the `-.`/`--hidden` flag, even when not using `--no-ignore-vcs`. This leads to several unwanted results. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0a76c3807..7068c1c7d 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,4 @@ apps/emqx_conf/etc/emqx.conf.all.rendered* rebar-git-cache.tar # build docker image locally .docker_image_tag +.git/ From f100825ff4fb50f9af9c2fba900c4db90aa215be Mon Sep 17 00:00:00 2001 From: Ivan Dyachkov Date: Wed, 17 Jan 2024 15:18:03 +0100 Subject: [PATCH 68/89] ci: bump actions versions --- .github/actions/package-macos/action.yaml | 2 +- .github/workflows/_pr_entrypoint.yaml | 2 +- .github/workflows/_push-entrypoint.yaml | 2 +- .github/workflows/build_docker_for_test.yaml | 2 +- .github/workflows/build_packages.yaml | 2 +- .github/workflows/build_packages_cron.yaml | 4 ++-- .github/workflows/build_slim_packages.yaml | 6 +++--- .github/workflows/check_deps_integrity.yaml | 2 +- .github/workflows/performance_test.yaml | 18 +++++++++--------- .github/workflows/run_conf_tests.yaml | 2 +- .github/workflows/run_emqx_app_tests.yaml | 2 +- .github/workflows/run_jmeter_tests.yaml | 14 +++++++------- .github/workflows/run_relup_tests.yaml | 4 ++-- .github/workflows/run_test_cases.yaml | 10 +++++----- .github/workflows/scorecard.yaml | 2 +- .github/workflows/static_checks.yaml | 2 +- 16 files changed, 38 insertions(+), 38 deletions(-) diff --git a/.github/actions/package-macos/action.yaml b/.github/actions/package-macos/action.yaml index bae335cf0..64d179b46 100644 --- a/.github/actions/package-macos/action.yaml +++ b/.github/actions/package-macos/action.yaml @@ -51,7 +51,7 @@ runs: echo "SELF_HOSTED=false" >> $GITHUB_OUTPUT ;; esac - - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 + - uses: actions/cache@e12d46a63a90f2fae62d114769bbf2a179198b5c # v3.3.3 id: cache if: steps.prepare.outputs.SELF_HOSTED != 'true' with: diff --git a/.github/workflows/_pr_entrypoint.yaml b/.github/workflows/_pr_entrypoint.yaml index 86e676ebe..1f7e8e466 100644 --- a/.github/workflows/_pr_entrypoint.yaml +++ b/.github/workflows/_pr_entrypoint.yaml @@ -144,7 +144,7 @@ jobs: echo "PROFILE=${PROFILE}" | tee -a .env echo "PKG_VSN=$(./pkg-vsn.sh ${PROFILE})" | tee -a .env zip -ryq -x@.github/workflows/.zipignore $PROFILE.zip . - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: ${{ matrix.profile }} path: ${{ matrix.profile }}.zip diff --git a/.github/workflows/_push-entrypoint.yaml b/.github/workflows/_push-entrypoint.yaml index 1e0dd941b..a6d0e178e 100644 --- a/.github/workflows/_push-entrypoint.yaml +++ b/.github/workflows/_push-entrypoint.yaml @@ -152,7 +152,7 @@ jobs: echo "PROFILE=${PROFILE}" | tee -a .env echo "PKG_VSN=$(./pkg-vsn.sh ${PROFILE})" | tee -a .env zip -ryq -x@.github/workflows/.zipignore $PROFILE.zip . - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: ${{ matrix.profile }} path: ${{ matrix.profile }}.zip diff --git a/.github/workflows/build_docker_for_test.yaml b/.github/workflows/build_docker_for_test.yaml index ccff642f9..25adea083 100644 --- a/.github/workflows/build_docker_for_test.yaml +++ b/.github/workflows/build_docker_for_test.yaml @@ -57,7 +57,7 @@ jobs: - name: export docker image run: | docker save $EMQX_IMAGE_TAG | gzip > $EMQX_NAME-docker-$PKG_VSN.tar.gz - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: "${{ env.EMQX_NAME }}-docker" path: "${{ env.EMQX_NAME }}-docker-${{ env.PKG_VSN }}.tar.gz" diff --git a/.github/workflows/build_packages.yaml b/.github/workflows/build_packages.yaml index 3382bbeed..abde2672e 100644 --- a/.github/workflows/build_packages.yaml +++ b/.github/workflows/build_packages.yaml @@ -95,7 +95,7 @@ jobs: apple_developer_identity: ${{ secrets.APPLE_DEVELOPER_IDENTITY }} apple_developer_id_bundle: ${{ secrets.APPLE_DEVELOPER_ID_BUNDLE }} apple_developer_id_bundle_password: ${{ secrets.APPLE_DEVELOPER_ID_BUNDLE_PASSWORD }} - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: success() with: name: ${{ matrix.profile }}-${{ matrix.otp }}-${{ matrix.os }} diff --git a/.github/workflows/build_packages_cron.yaml b/.github/workflows/build_packages_cron.yaml index 56d5c37f2..5e90be8c4 100644 --- a/.github/workflows/build_packages_cron.yaml +++ b/.github/workflows/build_packages_cron.yaml @@ -66,7 +66,7 @@ jobs: set -eu ./scripts/pkg-tests.sh "${PROFILE}-tgz" ./scripts/pkg-tests.sh "${PROFILE}-pkg" - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: success() with: name: ${{ matrix.profile[0] }}-${{ matrix.os }} @@ -111,7 +111,7 @@ jobs: apple_developer_identity: ${{ secrets.APPLE_DEVELOPER_IDENTITY }} apple_developer_id_bundle: ${{ secrets.APPLE_DEVELOPER_ID_BUNDLE }} apple_developer_id_bundle_password: ${{ secrets.APPLE_DEVELOPER_ID_BUNDLE_PASSWORD }} - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: success() with: name: ${{ matrix.profile }}-${{ matrix.os }} diff --git a/.github/workflows/build_slim_packages.yaml b/.github/workflows/build_slim_packages.yaml index 4b9ca76b9..45dee2b3d 100644 --- a/.github/workflows/build_slim_packages.yaml +++ b/.github/workflows/build_slim_packages.yaml @@ -88,13 +88,13 @@ jobs: run: | make ${EMQX_NAME}-elixir-pkg ./scripts/pkg-tests.sh ${EMQX_NAME}-elixir-pkg - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: "${{ matrix.profile[0] }}-${{ matrix.profile[1] }}-${{ matrix.profile[2] }}-${{ matrix.profile[3] }}-${{ matrix.profile[4] }}" path: _packages/${{ matrix.profile[0] }}/* retention-days: 7 compression-level: 0 - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: "${{ matrix.profile[0] }}-schema-dump-${{ matrix.profile[1] }}-${{ matrix.profile[2] }}-${{ matrix.profile[3] }}-${{ matrix.profile[4] }}" path: | @@ -128,7 +128,7 @@ jobs: apple_developer_identity: ${{ secrets.APPLE_DEVELOPER_IDENTITY }} apple_developer_id_bundle: ${{ secrets.APPLE_DEVELOPER_ID_BUNDLE }} apple_developer_id_bundle_password: ${{ secrets.APPLE_DEVELOPER_ID_BUNDLE_PASSWORD }} - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: ${{ matrix.os }} path: _packages/**/* diff --git a/.github/workflows/check_deps_integrity.yaml b/.github/workflows/check_deps_integrity.yaml index 30d788500..cfe6cfbae 100644 --- a/.github/workflows/check_deps_integrity.yaml +++ b/.github/workflows/check_deps_integrity.yaml @@ -36,7 +36,7 @@ jobs: MIX_ENV: emqx-enterprise PROFILE: emqx-enterprise - name: Upload produced lock files - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: produced_lock_files diff --git a/.github/workflows/performance_test.yaml b/.github/workflows/performance_test.yaml index ede8abf07..629e8fcdb 100644 --- a/.github/workflows/performance_test.yaml +++ b/.github/workflows/performance_test.yaml @@ -52,7 +52,7 @@ jobs: id: package_file run: | echo "PACKAGE_FILE=$(find _packages/emqx -name 'emqx-*.deb' | head -n 1 | xargs basename)" >> $GITHUB_OUTPUT - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: emqx-ubuntu20.04 path: _packages/emqx/${{ steps.package_file.outputs.PACKAGE_FILE }} @@ -113,13 +113,13 @@ jobs: working-directory: ./tf-emqx-performance-test run: | terraform destroy -auto-approve - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: success() with: name: metrics path: | "./tf-emqx-performance-test/*.tar.gz" - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: terraform @@ -184,13 +184,13 @@ jobs: working-directory: ./tf-emqx-performance-test run: | terraform destroy -auto-approve - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: success() with: name: metrics path: | "./tf-emqx-performance-test/*.tar.gz" - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: terraform @@ -257,13 +257,13 @@ jobs: working-directory: ./tf-emqx-performance-test run: | terraform destroy -auto-approve - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: success() with: name: metrics path: | "./tf-emqx-performance-test/*.tar.gz" - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: terraform @@ -330,13 +330,13 @@ jobs: working-directory: ./tf-emqx-performance-test run: | terraform destroy -auto-approve - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: success() with: name: metrics path: | "./tf-emqx-performance-test/*.tar.gz" - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: terraform diff --git a/.github/workflows/run_conf_tests.yaml b/.github/workflows/run_conf_tests.yaml index cac63910b..913f4e5a4 100644 --- a/.github/workflows/run_conf_tests.yaml +++ b/.github/workflows/run_conf_tests.yaml @@ -40,7 +40,7 @@ jobs: if: failure() run: | cat _build/${{ matrix.profile }}/rel/emqx/logs/erlang.log.* - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: conftest-logs-${{ matrix.profile }} diff --git a/.github/workflows/run_emqx_app_tests.yaml b/.github/workflows/run_emqx_app_tests.yaml index f7c645aeb..e6326b96c 100644 --- a/.github/workflows/run_emqx_app_tests.yaml +++ b/.github/workflows/run_emqx_app_tests.yaml @@ -58,7 +58,7 @@ jobs: ./rebar3 eunit -v --name 'eunit@127.0.0.1' ./rebar3 as standalone_test ct --name 'test@127.0.0.1' -v --readable=true ./rebar3 proper -d test/props - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: logs-emqx-app-tests diff --git a/.github/workflows/run_jmeter_tests.yaml b/.github/workflows/run_jmeter_tests.yaml index 86cbf220f..14ee999ef 100644 --- a/.github/workflows/run_jmeter_tests.yaml +++ b/.github/workflows/run_jmeter_tests.yaml @@ -16,7 +16,7 @@ jobs: steps: - name: Cache Jmeter id: cache-jmeter - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 + uses: actions/cache@e12d46a63a90f2fae62d114769bbf2a179198b5c # v3.3.3 with: path: /tmp/apache-jmeter.tgz key: apache-jmeter-5.4.3.tgz @@ -35,7 +35,7 @@ jobs: else wget --no-verbose --no-check-certificate -O /tmp/apache-jmeter.tgz $ARCHIVE_URL fi - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: apache-jmeter.tgz path: /tmp/apache-jmeter.tgz @@ -86,7 +86,7 @@ jobs: echo "check logs failed" exit 1 fi - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: always() with: name: jmeter_logs-advanced_feat-${{ matrix.scripts_type }} @@ -153,7 +153,7 @@ jobs: if: failure() run: | docker compose -f .ci/docker-compose-file/docker-compose-emqx-cluster.yaml logs --no-color > ./jmeter_logs/emqx.log - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: always() with: name: jmeter_logs-pgsql_authn_authz-${{ matrix.scripts_type }}_${{ matrix.pgsql_tag }} @@ -213,7 +213,7 @@ jobs: echo "check logs failed" exit 1 fi - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: always() with: name: jmeter_logs-mysql_authn_authz-${{ matrix.scripts_type }}_${{ matrix.mysql_tag }} @@ -265,7 +265,7 @@ jobs: echo "check logs failed" exit 1 fi - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: always() with: name: jmeter_logs-JWT_authn-${{ matrix.scripts_type }} @@ -309,7 +309,7 @@ jobs: echo "check logs failed" exit 1 fi - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: always() with: name: jmeter_logs-built_in_database_authn_authz-${{ matrix.scripts_type }} diff --git a/.github/workflows/run_relup_tests.yaml b/.github/workflows/run_relup_tests.yaml index db8cef69d..b5016d71c 100644 --- a/.github/workflows/run_relup_tests.yaml +++ b/.github/workflows/run_relup_tests.yaml @@ -45,7 +45,7 @@ jobs: run: | export PROFILE='emqx-enterprise' make emqx-enterprise-tgz - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 name: Upload built emqx and test scenario with: name: relup_tests_emqx_built @@ -111,7 +111,7 @@ jobs: docker logs node2.emqx.io | tee lux_logs/emqx2.log exit 1 fi - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 name: Save debug data if: failure() with: diff --git a/.github/workflows/run_test_cases.yaml b/.github/workflows/run_test_cases.yaml index 8841c845b..ca478a381 100644 --- a/.github/workflows/run_test_cases.yaml +++ b/.github/workflows/run_test_cases.yaml @@ -64,7 +64,7 @@ jobs: CT_COVER_EXPORT_PREFIX: ${{ matrix.profile }}-${{ matrix.otp }} run: make proper - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: coverdata-${{ matrix.profile }}-${{ matrix.otp }} path: _build/test/cover @@ -108,7 +108,7 @@ jobs: ENABLE_COVER_COMPILE: 1 CT_COVER_EXPORT_PREFIX: ${{ matrix.profile }}-${{ matrix.otp }}-sg${{ matrix.suitegroup }} run: ./scripts/ct/run.sh --ci --app ${{ matrix.app }} - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: coverdata-${{ matrix.profile }}-${{ matrix.prefix }}-${{ matrix.otp }}-sg${{ matrix.suitegroup }} path: _build/test/cover @@ -116,7 +116,7 @@ jobs: - name: compress logs if: failure() run: tar -czf logs.tar.gz _build/test/logs - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: logs-${{ matrix.profile }}-${{ matrix.prefix }}-${{ matrix.otp }}-sg${{ matrix.suitegroup }} @@ -155,7 +155,7 @@ jobs: CT_COVER_EXPORT_PREFIX: ${{ matrix.profile }}-${{ matrix.otp }}-sg${{ matrix.suitegroup }} run: | make "${{ matrix.app }}-ct" - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: coverdata-${{ matrix.profile }}-${{ matrix.prefix }}-${{ matrix.otp }}-sg${{ matrix.suitegroup }} path: _build/test/cover @@ -164,7 +164,7 @@ jobs: - name: compress logs if: failure() run: tar -czf logs.tar.gz _build/test/logs - - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + - uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 if: failure() with: name: logs-${{ matrix.profile }}-${{ matrix.prefix }}-${{ matrix.otp }}-sg${{ matrix.suitegroup }} diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index f43892d01..aabe4e5b0 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -40,7 +40,7 @@ jobs: publish_results: true - name: "Upload artifact" - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595 # v4.1.0 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/static_checks.yaml b/.github/workflows/static_checks.yaml index a092210c8..96d3e31e9 100644 --- a/.github/workflows/static_checks.yaml +++ b/.github/workflows/static_checks.yaml @@ -37,7 +37,7 @@ jobs: run: | unzip -o -q ${{ matrix.profile }}.zip git config --global --add safe.directory "$GITHUB_WORKSPACE" - - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 + - uses: actions/cache@e12d46a63a90f2fae62d114769bbf2a179198b5c # v3.3.3 with: path: "emqx_dialyzer_${{ matrix.otp }}_plt" key: rebar3-dialyzer-plt-${{ matrix.profile }}-${{ matrix.otp }}-${{ hashFiles('rebar.*', 'apps/*/rebar.*') }} From 51d32bd620542537146f49f07d96602098ec1425 Mon Sep 17 00:00:00 2001 From: JianBo He Date: Thu, 18 Jan 2024 17:29:23 +0800 Subject: [PATCH 69/89] chore: improve http connector logs format --- .../src/emqx_bridge_http_connector.erl | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl index a148a4d16..fd522726f 100644 --- a/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl +++ b/apps/emqx_bridge_http/src/emqx_bridge_http_connector.erl @@ -252,6 +252,7 @@ start_pool(PoolName, PoolOpts) -> {error, {already_started, _}} -> ?SLOG(warning, #{ msg => "emqx_connector_on_start_already_started", + connector => PoolName, pool_name => PoolName }), ok; @@ -510,8 +511,8 @@ resolve_pool_worker(#{pool_name := PoolName} = State, Key) -> on_get_channels(ResId) -> emqx_bridge_v2:get_channels_for_connector(ResId). -on_get_status(_InstId, #{pool_name := PoolName, connect_timeout := Timeout} = State) -> - case do_get_status(PoolName, Timeout) of +on_get_status(InstId, #{pool_name := InstId, connect_timeout := Timeout} = State) -> + case do_get_status(InstId, Timeout) of ok -> connected; {error, still_connecting} -> @@ -527,12 +528,7 @@ do_get_status(PoolName, Timeout) -> case ehttpc:health_check(Worker, Timeout) of ok -> ok; - {error, Reason} = Error -> - ?SLOG(error, #{ - msg => "http_connector_get_status_failed", - reason => redact(Reason), - worker => Worker - }), + {error, _} = Error -> Error end end, @@ -543,14 +539,20 @@ do_get_status(PoolName, Timeout) -> case [E || {error, _} = E <- Results] of [] -> ok; - Errors -> - hd(Errors) + [{error, Reason} | _] -> + ?SLOG(info, #{ + msg => "health_check_failed", + reason => redact(Reason), + connector => PoolName + }), + {error, Reason} end catch exit:timeout -> - ?SLOG(error, #{ - msg => "http_connector_pmap_failed", - reason => timeout + ?SLOG(info, #{ + msg => "health_check_failed", + reason => timeout, + connector => PoolName }), {error, timeout} end. From fb39e1eacc88df8c6f0b2660932b694f3614c132 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 16 Jan 2024 17:10:07 -0300 Subject: [PATCH 70/89] feat(ds): allow customizing the data directory The storage expectations for the RocksDB DB may be different from our usual data directory. Also, it may consume a lot more storage than other data. This allows customizing the data directory for the builtin DS storage backend. Note: if the cluster was already initialized using a directory path, changing that config will have no effect. This path is currently persisted in mnesia and used when reopening the DB. --- apps/emqx/src/emqx_persistent_message.erl | 8 ++++- apps/emqx/src/emqx_schema.erl | 30 +++++++++++++++++++ apps/emqx_conf/src/emqx_conf_schema.erl | 18 ++--------- .../src/emqx_ds_storage_layer.erl | 20 +++++++++---- 4 files changed, 53 insertions(+), 23 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_message.erl b/apps/emqx/src/emqx_persistent_message.erl index 295ddd3dc..d725c9b2c 100644 --- a/apps/emqx/src/emqx_persistent_message.erl +++ b/apps/emqx/src/emqx_persistent_message.erl @@ -61,10 +61,16 @@ force_ds() -> emqx_config:get([session_persistence, force_persistence]). storage_backend(#{ - builtin := #{enable := true, n_shards := NShards, replication_factor := ReplicationFactor} + builtin := #{ + enable := true, + data_dir := DataDir, + n_shards := NShards, + replication_factor := ReplicationFactor + } }) -> #{ backend => builtin, + data_dir => DataDir, storage => {emqx_ds_storage_bitfield_lts, #{}}, n_shards => NShards, replication_factor => ReplicationFactor diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index ae22db14f..7cd67089d 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -94,6 +94,7 @@ non_empty_string/1, validations/0, naive_env_interpolation/1, + ensure_unicode_path/2, validate_server_ssl_opts/1, validate_tcp_keepalive/1, parse_tcp_keepalive/1 @@ -1882,6 +1883,18 @@ fields("session_storage_backend_builtin") -> default => true } )}, + {"data_dir", + sc( + string(), + #{ + desc => ?DESC(session_builtin_data_dir), + default => <<"${EMQX_DATA_DIR}">>, + importance => ?IMPORTANCE_LOW, + converter => fun(Path, Opts) -> + naive_env_interpolation(ensure_unicode_path(Path, Opts)) + end + } + )}, {"n_shards", sc( pos_integer(), @@ -3836,3 +3849,20 @@ tags_schema() -> importance => ?IMPORTANCE_LOW } ). + +ensure_unicode_path(undefined, _) -> + undefined; +ensure_unicode_path(Path, #{make_serializable := true}) -> + %% format back to serializable string + unicode:characters_to_binary(Path, utf8); +ensure_unicode_path(Path, Opts) when is_binary(Path) -> + case unicode:characters_to_list(Path, utf8) of + {R, _, _} when R =:= error orelse R =:= incomplete -> + throw({"bad_file_path_string", Path}); + PathStr -> + ensure_unicode_path(PathStr, Opts) + end; +ensure_unicode_path(Path, _) when is_list(Path) -> + Path; +ensure_unicode_path(Path, _) -> + throw({"not_string", Path}). diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index abb2e14e3..04f19b95f 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1432,22 +1432,8 @@ convert_rotation(#{} = Rotation, _Opts) -> maps:get(<<"count">>, Rotation, 10); convert_rotation(Count, _Opts) when is_integer(Count) -> Count; convert_rotation(Count, _Opts) -> throw({"bad_rotation", Count}). -ensure_unicode_path(undefined, _) -> - undefined; -ensure_unicode_path(Path, #{make_serializable := true}) -> - %% format back to serializable string - unicode:characters_to_binary(Path, utf8); -ensure_unicode_path(Path, Opts) when is_binary(Path) -> - case unicode:characters_to_list(Path, utf8) of - {R, _, _} when R =:= error orelse R =:= incomplete -> - throw({"bad_file_path_string", Path}); - PathStr -> - ensure_unicode_path(PathStr, Opts) - end; -ensure_unicode_path(Path, _) when is_list(Path) -> - Path; -ensure_unicode_path(Path, _) -> - throw({"not_string", Path}). +ensure_unicode_path(Path, Opts) -> + emqx_schema:ensure_unicode_path(Path, Opts). log_level() -> hoconsc:enum([debug, info, notice, warning, error, critical, alert, emergency, all]). diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index ab64005b6..d44235924 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -34,7 +34,7 @@ -export([start_link/2, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). %% internal exports: --export([db_dir/1]). +-export([db_dir/2]). -export_type([ gen_id/0, @@ -168,7 +168,13 @@ open_shard(Shard, Options) -> -spec drop_shard(shard_id()) -> ok. drop_shard(Shard) -> catch emqx_ds_storage_layer_sup:stop_shard(Shard), - ok = rocksdb:destroy(db_dir(Shard), []). + case persistent_term:get({?MODULE, Shard, data_dir}, undefined) of + undefined -> + ok; + BaseDir -> + ok = rocksdb:destroy(db_dir(BaseDir, Shard), []), + persistent_term:erase({?MODULE, Shard, base_dir}) + end. -spec store_batch(shard_id(), [emqx_types:message()], emqx_ds:message_store_opts()) -> emqx_ds:store_batch_result(). @@ -424,7 +430,8 @@ rocksdb_open(Shard, Options) -> {create_missing_column_families, true} | maps:get(db_options, Options, []) ], - DBDir = db_dir(Shard), + DataDir = maps:get(data_dir, Options, emqx:data_dir()), + DBDir = db_dir(DataDir, Shard), _ = filelib:ensure_dir(DBDir), ExistingCFs = case rocksdb:list_column_families(DBDir, DBOptions) of @@ -440,15 +447,16 @@ rocksdb_open(Shard, Options) -> ], case rocksdb:open(DBDir, DBOptions, ColumnFamilies) of {ok, DBHandle, [_CFDefault | CFRefs]} -> + persistent_term:put({?MODULE, Shard, data_dir}, DataDir), {CFNames, _} = lists:unzip(ExistingCFs), {ok, DBHandle, lists:zip(CFNames, CFRefs)}; Error -> Error end. --spec db_dir(shard_id()) -> file:filename(). -db_dir({DB, ShardId}) -> - filename:join([emqx:data_dir(), atom_to_list(DB), binary_to_list(ShardId)]). +-spec db_dir(file:filename(), shard_id()) -> file:filename(). +db_dir(BaseDir, {DB, ShardId}) -> + filename:join([BaseDir, atom_to_list(DB), binary_to_list(ShardId)]). -spec update_last_until(Schema, emqx_ds:time()) -> Schema when Schema :: shard_schema() | shard(). update_last_until(Schema, Until) -> From 4a0fd756ae8c43edff0ccfa60ca0a6c8bf34b28b Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Mon, 15 Jan 2024 16:52:59 -0300 Subject: [PATCH 71/89] feat(ds): add `list_generations` and `drop_generation` APIs --- apps/emqx/priv/bpapi.versions | 1 + apps/emqx_durable_storage/src/emqx_ds.erl | 57 +++- .../src/emqx_ds_replication_layer.erl | 45 ++- .../src/emqx_ds_storage_bitfield_lts.erl | 16 + .../src/emqx_ds_storage_layer.erl | 293 ++++++++++++++---- .../src/emqx_ds_storage_reference.erl | 8 +- .../src/proto/emqx_ds_proto_v3.erl | 147 +++++++++ .../test/emqx_ds_SUITE.erl | 250 ++++++++++++++- 8 files changed, 755 insertions(+), 62 deletions(-) create mode 100644 apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl diff --git a/apps/emqx/priv/bpapi.versions b/apps/emqx/priv/bpapi.versions index 859d7fbe0..987c19535 100644 --- a/apps/emqx/priv/bpapi.versions +++ b/apps/emqx/priv/bpapi.versions @@ -22,6 +22,7 @@ {emqx_delayed,3}. {emqx_ds,1}. {emqx_ds,2}. +{emqx_ds,3}. {emqx_eviction_agent,1}. {emqx_eviction_agent,2}. {emqx_exhook,1}. diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index d679f7097..434169520 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -22,7 +22,14 @@ -module(emqx_ds). %% Management API: --export([open_db/2, update_db_config/2, add_generation/1, drop_db/1]). +-export([ + open_db/2, + update_db_config/2, + add_generation/1, + list_generations_with_lifetimes/1, + drop_generation/2, + drop_db/1 +]). %% Message storage API: -export([store_batch/2, store_batch/3]). @@ -52,7 +59,10 @@ get_iterator_result/1, ds_specific_stream/0, - ds_specific_iterator/0 + ds_specific_iterator/0, + ds_specific_generation_rank/0, + generation_rank/0, + generation_info/0 ]). %%================================================================================ @@ -80,6 +90,8 @@ -type ds_specific_stream() :: term(). +-type ds_specific_generation_rank() :: term(). + -type message_key() :: binary(). -type store_batch_result() :: ok | {error, _}. @@ -114,6 +126,17 @@ -type get_iterator_result(Iterator) :: {ok, Iterator} | undefined. +%% An opaque term identifying a generation. Each implementation will possibly add +%% information to this term to match its inner structure (e.g.: by embedding the shard id, +%% in the case of `emqx_ds_replication_layer'). +-opaque generation_rank() :: ds_specific_generation_rank(). + +-type generation_info() :: #{ + created_at := time(), + since := time(), + until := time() | undefined +}. + -define(persistent_term(DB), {emqx_ds_db_backend, DB}). -define(module(DB), (persistent_term:get(?persistent_term(DB)))). @@ -128,6 +151,11 @@ -callback update_db_config(db(), create_db_opts()) -> ok | {error, _}. +-callback list_generations_with_lifetimes(db()) -> + #{generation_rank() => generation_info()}. + +-callback drop_generation(db(), generation_rank()) -> ok | {error, _}. + -callback drop_db(db()) -> ok | {error, _}. -callback store_batch(db(), [emqx_types:message()], message_store_opts()) -> store_batch_result(). @@ -142,6 +170,11 @@ -callback next(db(), Iterator, pos_integer()) -> next_result(Iterator). +-optional_callbacks([ + list_generations_with_lifetimes/1, + drop_generation/2 +]). + %%================================================================================ %% API funcions %%================================================================================ @@ -166,6 +199,26 @@ add_generation(DB) -> update_db_config(DB, Opts) -> ?module(DB):update_db_config(DB, Opts). +-spec list_generations_with_lifetimes(db()) -> #{generation_rank() => generation_info()}. +list_generations_with_lifetimes(DB) -> + Mod = ?module(DB), + case erlang:function_exported(Mod, list_generations_with_lifetimes, 1) of + true -> + Mod:list_generations_with_lifetimes(DB); + false -> + #{} + end. + +-spec drop_generation(db(), generation_rank()) -> ok | {error, _}. +drop_generation(DB, GenId) -> + Mod = ?module(DB), + case erlang:function_exported(Mod, drop_generation, 2) of + true -> + Mod:drop_generation(DB, GenId); + false -> + {error, not_implemented} + end. + %% @doc TODO: currently if one or a few shards are down, they won't be %% deleted. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl index 68d9459ee..387587570 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl @@ -25,6 +25,8 @@ open_db/2, add_generation/1, update_db_config/2, + list_generations_with_lifetimes/1, + drop_generation/2, drop_db/1, store_batch/3, get_streams/3, @@ -41,7 +43,9 @@ do_make_iterator_v1/5, do_update_iterator_v2/4, do_next_v1/4, - do_add_generation_v2/1 + do_add_generation_v2/1, + do_list_generations_with_lifetimes_v3/2, + do_drop_generation_v3/3 ]). -export_type([shard_id/0, builtin_db_opts/0, stream/0, iterator/0, message_id/0, batch/0]). @@ -104,6 +108,8 @@ ?batch_messages := [emqx_types:message()] }. +-type generation_rank() :: {shard_id(), term()}. + %%================================================================================ %% API functions %%================================================================================ @@ -135,6 +141,32 @@ add_generation(DB) -> update_db_config(DB, CreateOpts) -> emqx_ds_replication_layer_meta:update_db_config(DB, CreateOpts). +-spec list_generations_with_lifetimes(emqx_ds:db()) -> + #{generation_rank() => emqx_ds:generation_info()}. +list_generations_with_lifetimes(DB) -> + Shards = list_shards(DB), + lists:foldl( + fun(Shard, GensAcc) -> + Node = node_of_shard(DB, Shard), + maps:fold( + fun(GenId, Data, AccInner) -> + AccInner#{{Shard, GenId} => Data} + end, + GensAcc, + emqx_ds_proto_v3:list_generations_with_lifetimes(Node, DB, Shard) + ) + end, + #{}, + Shards + ). + +-spec drop_generation(emqx_ds:db(), generation_rank()) -> ok | {error, _}. +drop_generation(DB, {Shard, GenId}) -> + %% TODO: drop generation in all nodes in the replica set, not only in the leader, + %% after we have proper replication in place. + Node = node_of_shard(DB, Shard), + emqx_ds_proto_v3:drop_generation(Node, DB, Shard, GenId). + -spec drop_db(emqx_ds:db()) -> ok | {error, _}. drop_db(DB) -> Nodes = list_nodes(), @@ -301,7 +333,6 @@ do_next_v1(DB, Shard, Iter, BatchSize) -> -spec do_add_generation_v2(emqx_ds:db()) -> ok | {error, _}. do_add_generation_v2(DB) -> MyShards = emqx_ds_replication_layer_meta:my_owned_shards(DB), - lists:foreach( fun(ShardId) -> emqx_ds_storage_layer:add_generation({DB, ShardId}) @@ -309,6 +340,16 @@ do_add_generation_v2(DB) -> MyShards ). +-spec do_list_generations_with_lifetimes_v3(emqx_ds:db(), shard_id()) -> + #{emqx_ds:ds_specific_generation_rank() => emqx_ds:generation_info()}. +do_list_generations_with_lifetimes_v3(DB, ShardId) -> + emqx_ds_storage_layer:list_generations_with_lifetimes({DB, ShardId}). + +-spec do_drop_generation_v3(emqx_ds:db(), shard_id(), emqx_ds_storage_layer:gen_id()) -> + ok | {error, _}. +do_drop_generation_v3(DB, ShardId, GenId) -> + emqx_ds_storage_layer:drop_generation({DB, ShardId}, GenId). + %%================================================================================ %% Internal functions %%================================================================================ diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl index 4c59a5f62..27d41e6c6 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl @@ -27,6 +27,7 @@ -export([ create/4, open/5, + drop/5, store_batch/4, get_streams/4, make_iterator/5, @@ -199,6 +200,21 @@ open(_Shard, DBHandle, GenId, CFRefs, Schema) -> ts_offset = TSOffsetBits }. +-spec drop( + emqx_ds_storage_layer:shard_id(), + rocksdb:db_handle(), + emqx_ds_storage_layer:gen_id(), + emqx_ds_storage_layer:cf_refs(), + s() +) -> + ok. +drop(_Shard, DBHandle, GenId, CFRefs, #s{}) -> + {_, DataCF} = lists:keyfind(data_cf(GenId), 1, CFRefs), + {_, TrieCF} = lists:keyfind(trie_cf(GenId), 1, CFRefs), + ok = rocksdb:drop_column_family(DBHandle, DataCF), + ok = rocksdb:drop_column_family(DBHandle, TrieCF), + ok. + -spec store_batch( emqx_ds_storage_layer:shard_id(), s(), [emqx_types:message()], emqx_ds:message_store_opts() ) -> diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index d44235924..0dcb8ce52 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -27,7 +27,9 @@ update_iterator/3, next/3, update_config/2, - add_generation/1 + add_generation/1, + list_generations_with_lifetimes/1, + drop_generation/2 ]). %% gen_server @@ -44,7 +46,8 @@ iterator/0, shard_id/0, options/0, - prototype/0 + prototype/0, + post_creation_context/0 ]). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). @@ -95,11 +98,18 @@ %%%% Generation: +-define(GEN_KEY(GEN_ID), {generation, GEN_ID}). + -type generation(Data) :: #{ %% Module that handles data for the generation: module := module(), %% Module-specific data defined at generation creation time: data := Data, + %% Column families used by this generation + cf_refs := cf_refs(), + %% Time at which this was created. Might differ from `since', in particular for the + %% first generation. + created_at := emqx_ds:time(), %% When should this generation become active? %% This generation should only contain messages timestamped no earlier than that. %% The very first generation will have `since` equal 0. @@ -121,7 +131,7 @@ %% This data is used to create new generation: prototype := prototype(), %% Generations: - {generation, gen_id()} => GenData + ?GEN_KEY(gen_id()) => GenData }. %% Shard schema (persistent): @@ -132,6 +142,18 @@ -type options() :: map(). +-type post_creation_context() :: + #{ + shard_id := emqx_ds_storage_layer:shard_id(), + db := rocksdb:db_handle(), + new_gen_id := emqx_ds_storage_layer:gen_id(), + old_gen_id := emqx_ds_storage_layer:gen_id(), + new_cf_refs := cf_refs(), + old_cf_refs := cf_refs(), + new_gen_runtime_data := _NewData, + old_gen_runtime_data := _OldData + }. + %%================================================================================ %% Generation callbacks %%================================================================================ @@ -145,6 +167,9 @@ -callback open(shard_id(), rocksdb:db_handle(), gen_id(), cf_refs(), _Schema) -> _Data. +-callback drop(shard_id(), rocksdb:db_handle(), gen_id(), cf_refs(), _RuntimeData) -> + ok | {error, _Reason}. + -callback store_batch(shard_id(), _Data, [emqx_types:message()], emqx_ds:message_store_opts()) -> emqx_ds:store_batch_result(). @@ -157,10 +182,17 @@ -callback next(shard_id(), _Data, Iter, pos_integer()) -> {ok, Iter, [emqx_types:message()]} | {error, _}. +-callback post_creation_actions(post_creation_context()) -> _Data. + +-optional_callbacks([post_creation_actions/1]). + %%================================================================================ %% API for the replication layer %%================================================================================ +-record(call_list_generations_with_lifetimes, {}). +-record(call_drop_generation, {gen_id :: gen_id()}). + -spec open_shard(shard_id(), options()) -> ok. open_shard(Shard, Options) -> emqx_ds_storage_layer_sup:ensure_shard(Shard, Options). @@ -188,18 +220,25 @@ store_batch(Shard, Messages, Options) -> [{integer(), stream()}]. get_streams(Shard, TopicFilter, StartTime) -> Gens = generations_since(Shard, StartTime), + ?tp(get_streams_all_gens, #{gens => Gens}), lists:flatmap( fun(GenId) -> - #{module := Mod, data := GenData} = generation_get(Shard, GenId), - Streams = Mod:get_streams(Shard, GenData, TopicFilter, StartTime), - [ - {GenId, #{ - ?tag => ?STREAM, - ?generation => GenId, - ?enc => Stream - }} - || Stream <- Streams - ] + ?tp(get_streams_get_gen, #{gen_id => GenId}), + case generation_get_safe(Shard, GenId) of + {ok, #{module := Mod, data := GenData}} -> + Streams = Mod:get_streams(Shard, GenData, TopicFilter, StartTime), + [ + {GenId, #{ + ?tag => ?STREAM, + ?generation => GenId, + ?enc => Stream + }} + || Stream <- Streams + ]; + {error, not_found} -> + %% race condition: generation was dropped before getting its streams? + [] + end end, Gens ). @@ -209,16 +248,20 @@ get_streams(Shard, TopicFilter, StartTime) -> make_iterator( Shard, #{?tag := ?STREAM, ?generation := GenId, ?enc := Stream}, TopicFilter, StartTime ) -> - #{module := Mod, data := GenData} = generation_get(Shard, GenId), - case Mod:make_iterator(Shard, GenData, Stream, TopicFilter, StartTime) of - {ok, Iter} -> - {ok, #{ - ?tag => ?IT, - ?generation => GenId, - ?enc => Iter - }}; - {error, _} = Err -> - Err + case generation_get_safe(Shard, GenId) of + {ok, #{module := Mod, data := GenData}} -> + case Mod:make_iterator(Shard, GenData, Stream, TopicFilter, StartTime) of + {ok, Iter} -> + {ok, #{ + ?tag => ?IT, + ?generation => GenId, + ?enc => Iter + }}; + {error, _} = Err -> + Err + end; + {error, not_found} -> + {error, end_of_stream} end. -spec update_iterator( @@ -230,33 +273,42 @@ update_iterator( #{?tag := ?IT, ?generation := GenId, ?enc := OldIter}, DSKey ) -> - #{module := Mod, data := GenData} = generation_get(Shard, GenId), - case Mod:update_iterator(Shard, GenData, OldIter, DSKey) of - {ok, Iter} -> - {ok, #{ - ?tag => ?IT, - ?generation => GenId, - ?enc => Iter - }}; - {error, _} = Err -> - Err + case generation_get_safe(Shard, GenId) of + {ok, #{module := Mod, data := GenData}} -> + case Mod:update_iterator(Shard, GenData, OldIter, DSKey) of + {ok, Iter} -> + {ok, #{ + ?tag => ?IT, + ?generation => GenId, + ?enc => Iter + }}; + {error, _} = Err -> + Err + end; + {error, not_found} -> + {error, end_of_stream} end. -spec next(shard_id(), iterator(), pos_integer()) -> emqx_ds:next_result(iterator()). next(Shard, Iter = #{?tag := ?IT, ?generation := GenId, ?enc := GenIter0}, BatchSize) -> - #{module := Mod, data := GenData} = generation_get(Shard, GenId), - Current = generation_current(Shard), - case Mod:next(Shard, GenData, GenIter0, BatchSize) of - {ok, _GenIter, []} when GenId < Current -> - %% This is a past generation. Storage layer won't write - %% any more messages here. The iterator reached the end: - %% the stream has been fully replayed. - {ok, end_of_stream}; - {ok, GenIter, Batch} -> - {ok, Iter#{?enc := GenIter}, Batch}; - Error = {error, _} -> - Error + case generation_get_safe(Shard, GenId) of + {ok, #{module := Mod, data := GenData}} -> + Current = generation_current(Shard), + case Mod:next(Shard, GenData, GenIter0, BatchSize) of + {ok, _GenIter, []} when GenId < Current -> + %% This is a past generation. Storage layer won't write + %% any more messages here. The iterator reached the end: + %% the stream has been fully replayed. + {ok, end_of_stream}; + {ok, GenIter, Batch} -> + {ok, Iter#{?enc := GenIter}, Batch}; + Error = {error, _} -> + Error + end; + {error, not_found} -> + %% generation was possibly dropped by GC + {ok, end_of_stream} end. -spec update_config(shard_id(), emqx_ds:create_db_opts()) -> ok. @@ -267,6 +319,21 @@ update_config(ShardId, Options) -> add_generation(ShardId) -> gen_server:call(?REF(ShardId), add_generation, infinity). +-spec list_generations_with_lifetimes(shard_id()) -> + #{ + gen_id() => #{ + created_at := emqx_ds:time(), + since := emqx_ds:time(), + until := undefined | emqx_ds:time() + } + }. +list_generations_with_lifetimes(ShardId) -> + gen_server:call(?REF(ShardId), #call_list_generations_with_lifetimes{}, infinity). + +-spec drop_generation(shard_id(), gen_id()) -> ok. +drop_generation(ShardId, GenId) -> + gen_server:call(?REF(ShardId), #call_drop_generation{gen_id = GenId}, infinity). + %%================================================================================ %% gen_server for the shard %%================================================================================ @@ -328,6 +395,13 @@ handle_call(add_generation, _From, S0) -> S = add_generation(S0, Since), commit_metadata(S), {reply, ok, S}; +handle_call(#call_list_generations_with_lifetimes{}, _From, S) -> + Generations = handle_list_generations_with_lifetimes(S), + {reply, Generations, S}; +handle_call(#call_drop_generation{gen_id = GenId}, _From, S0) -> + {Reply, S} = handle_drop_generation(S0, GenId), + commit_metadata(S), + {reply, Reply, S}; handle_call(#call_create_generation{since = Since}, _From, S0) -> S = add_generation(S0, Since), commit_metadata(S), @@ -359,7 +433,7 @@ open_shard(ShardId, DB, CFRefs, ShardSchema) -> %% Transform generation schemas to generation runtime data: maps:map( fun - ({generation, GenId}, GenSchema) -> + (?GEN_KEY(GenId), GenSchema) -> open_generation(ShardId, DB, CFRefs, GenId, GenSchema); (_K, Val) -> Val @@ -372,10 +446,40 @@ add_generation(S0, Since) -> #s{shard_id = ShardId, db = DB, schema = Schema0, shard = Shard0, cf_refs = CFRefs0} = S0, Schema1 = update_last_until(Schema0, Since), Shard1 = update_last_until(Shard0, Since), + + #{current_generation := OldGenId, prototype := {CurrentMod, _ModConf}} = Schema0, + OldKey = ?GEN_KEY(OldGenId), + #{OldKey := OldGenSchema} = Schema0, + #{cf_refs := OldCFRefs} = OldGenSchema, + #{OldKey := #{module := OldMod, data := OldGenData}} = Shard0, + {GenId, Schema, NewCFRefs} = new_generation(ShardId, DB, Schema1, Since), + CFRefs = NewCFRefs ++ CFRefs0, - Key = {generation, GenId}, - Generation = open_generation(ShardId, DB, CFRefs, GenId, maps:get(Key, Schema)), + Key = ?GEN_KEY(GenId), + Generation0 = + #{data := NewGenData0} = + open_generation(ShardId, DB, CFRefs, GenId, maps:get(Key, Schema)), + + %% When the new generation's module is the same as the last one, we might want to + %% perform actions like inheriting some of the previous (meta)data. + NewGenData = + run_post_creation_actions( + #{ + shard_id => ShardId, + db => DB, + new_gen_id => GenId, + old_gen_id => OldGenId, + new_cf_refs => NewCFRefs, + old_cf_refs => OldCFRefs, + new_gen_runtime_data => NewGenData0, + old_gen_runtime_data => OldGenData, + new_module => CurrentMod, + old_module => OldMod + } + ), + Generation = Generation0#{data := NewGenData}, + Shard = Shard1#{current_generation := GenId, Key => Generation}, S0#s{ cf_refs = CFRefs, @@ -383,6 +487,54 @@ add_generation(S0, Since) -> shard = Shard }. +-spec handle_list_generations_with_lifetimes(server_state()) -> #{gen_id() => map()}. +handle_list_generations_with_lifetimes(#s{schema = ShardSchema}) -> + maps:fold( + fun + (?GEN_KEY(GenId), GenSchema, Acc) -> + Acc#{GenId => export_generation(GenSchema)}; + (_Key, _Value, Acc) -> + Acc + end, + #{}, + ShardSchema + ). + +-spec export_generation(generation_schema()) -> map(). +export_generation(GenSchema) -> + maps:with([created_at, since, until], GenSchema). + +-spec handle_drop_generation(server_state(), gen_id()) -> + {ok | {error, current_generation}, server_state()}. +handle_drop_generation(#s{schema = #{current_generation := GenId}} = S0, GenId) -> + {{error, current_generation}, S0}; +handle_drop_generation(#s{schema = Schema} = S0, GenId) when + not is_map_key(?GEN_KEY(GenId), Schema) +-> + {{error, not_found}, S0}; +handle_drop_generation(S0, GenId) -> + #s{ + shard_id = ShardId, + db = DB, + schema = #{?GEN_KEY(GenId) := GenSchema} = OldSchema, + shard = OldShard, + cf_refs = OldCFRefs + } = S0, + #{module := Mod, cf_refs := GenCFRefs} = GenSchema, + #{?GEN_KEY(GenId) := #{data := RuntimeData}} = OldShard, + case Mod:drop(ShardId, DB, GenId, GenCFRefs, RuntimeData) of + ok -> + CFRefs = OldCFRefs -- GenCFRefs, + Shard = maps:remove(?GEN_KEY(GenId), OldShard), + Schema = maps:remove(?GEN_KEY(GenId), OldSchema), + S = S0#s{ + cf_refs = CFRefs, + shard = Shard, + schema = Schema + }, + {ok, S} + end. + -spec open_generation(shard_id(), rocksdb:db_handle(), cf_refs(), gen_id(), generation_schema()) -> generation(). open_generation(ShardId, DB, CFRefs, GenId, GenSchema) -> @@ -409,10 +561,17 @@ new_generation(ShardId, DB, Schema0, Since) -> #{current_generation := PrevGenId, prototype := {Mod, ModConf}} = Schema0, GenId = PrevGenId + 1, {GenData, NewCFRefs} = Mod:create(ShardId, DB, GenId, ModConf), - GenSchema = #{module => Mod, data => GenData, since => Since, until => undefined}, + GenSchema = #{ + module => Mod, + data => GenData, + cf_refs => NewCFRefs, + created_at => emqx_message:timestamp_now(), + since => Since, + until => undefined + }, Schema = Schema0#{ current_generation => GenId, - {generation, GenId} => GenSchema + ?GEN_KEY(GenId) => GenSchema }, {GenId, Schema, NewCFRefs}. @@ -461,9 +620,26 @@ db_dir(BaseDir, {DB, ShardId}) -> -spec update_last_until(Schema, emqx_ds:time()) -> Schema when Schema :: shard_schema() | shard(). update_last_until(Schema, Until) -> #{current_generation := GenId} = Schema, - GenData0 = maps:get({generation, GenId}, Schema), + GenData0 = maps:get(?GEN_KEY(GenId), Schema), GenData = GenData0#{until := Until}, - Schema#{{generation, GenId} := GenData}. + Schema#{?GEN_KEY(GenId) := GenData}. + +run_post_creation_actions( + #{ + new_module := Mod, + old_module := Mod, + new_gen_runtime_data := NewGenData + } = Context +) -> + case erlang:function_exported(Mod, post_creation_actions, 1) of + true -> + Mod:post_creation_actions(Context); + false -> + NewGenData + end; +run_post_creation_actions(#{new_gen_runtime_data := NewGenData}) -> + %% Different implementation modules + NewGenData. %%-------------------------------------------------------------------------------- %% Schema access @@ -476,15 +652,24 @@ generation_current(Shard) -> -spec generation_get(shard_id(), gen_id()) -> generation(). generation_get(Shard, GenId) -> - #{{generation, GenId} := GenData} = get_schema_runtime(Shard), + {ok, GenData} = generation_get_safe(Shard, GenId), GenData. +-spec generation_get_safe(shard_id(), gen_id()) -> {ok, generation()} | {error, not_found}. +generation_get_safe(Shard, GenId) -> + case get_schema_runtime(Shard) of + #{?GEN_KEY(GenId) := GenData} -> + {ok, GenData}; + #{} -> + {error, not_found} + end. + -spec generations_since(shard_id(), emqx_ds:time()) -> [gen_id()]. generations_since(Shard, Since) -> Schema = get_schema_runtime(Shard), maps:fold( fun - ({generation, GenId}, #{until := Until}, Acc) when Until >= Since -> + (?GEN_KEY(GenId), #{until := Until}, Acc) when Until >= Since -> [GenId | Acc]; (_K, _V, Acc) -> Acc diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl index da7ac79f6..c958e56dc 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_reference.erl @@ -30,6 +30,7 @@ -export([ create/4, open/5, + drop/5, store_batch/4, get_streams/4, make_iterator/5, @@ -85,6 +86,10 @@ open(_Shard, DBHandle, GenId, CFRefs, #schema{}) -> {_, CF} = lists:keyfind(data_cf(GenId), 1, CFRefs), #s{db = DBHandle, cf = CF}. +drop(_ShardId, DBHandle, _GenId, _CFRefs, #s{cf = CFHandle}) -> + ok = rocksdb:drop_column_family(DBHandle, CFHandle), + ok. + store_batch(_ShardId, #s{db = DB, cf = CF}, Messages, _Options) -> lists:foreach( fun(Msg) -> @@ -142,7 +147,8 @@ do_next(TopicFilter, StartTime, IT, Action, NLeft, Key0, Acc) -> case rocksdb:iterator_move(IT, Action) of {ok, Key, Blob} -> Msg = #message{topic = Topic, timestamp = TS} = binary_to_term(Blob), - case emqx_topic:match(Topic, TopicFilter) andalso TS >= StartTime of + TopicWords = emqx_topic:words(Topic), + case emqx_topic:match(TopicWords, TopicFilter) andalso TS >= StartTime of true -> do_next(TopicFilter, StartTime, IT, next, NLeft - 1, Key, [{Key, Msg} | Acc]); false -> diff --git a/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl b/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl new file mode 100644 index 000000000..74a174c4c --- /dev/null +++ b/apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl @@ -0,0 +1,147 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds_proto_v3). + +-behavior(emqx_bpapi). + +-include_lib("emqx_utils/include/bpapi.hrl"). +%% API: +-export([ + drop_db/2, + store_batch/5, + get_streams/5, + make_iterator/6, + next/5, + update_iterator/5, + add_generation/2, + + %% introduced in v3 + list_generations_with_lifetimes/3, + drop_generation/4 +]). + +%% behavior callbacks: +-export([introduced_in/0]). + +%%================================================================================ +%% API funcions +%%================================================================================ + +-spec drop_db([node()], emqx_ds:db()) -> + [{ok, ok} | {error, _}]. +drop_db(Node, DB) -> + erpc:multicall(Node, emqx_ds_replication_layer, do_drop_db_v1, [DB]). + +-spec get_streams( + node(), + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds:topic_filter(), + emqx_ds:time() +) -> + [{integer(), emqx_ds_storage_layer:stream()}]. +get_streams(Node, DB, Shard, TopicFilter, Time) -> + erpc:call(Node, emqx_ds_replication_layer, do_get_streams_v1, [DB, Shard, TopicFilter, Time]). + +-spec make_iterator( + node(), + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds_storage_layer:stream(), + emqx_ds:topic_filter(), + emqx_ds:time() +) -> + {ok, emqx_ds_storage_layer:iterator()} | {error, _}. +make_iterator(Node, DB, Shard, Stream, TopicFilter, StartTime) -> + erpc:call(Node, emqx_ds_replication_layer, do_make_iterator_v1, [ + DB, Shard, Stream, TopicFilter, StartTime + ]). + +-spec next( + node(), + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds_storage_layer:iterator(), + pos_integer() +) -> + {ok, emqx_ds_storage_layer:iterator(), [{emqx_ds:message_key(), [emqx_types:message()]}]} + | {ok, end_of_stream} + | {error, _}. +next(Node, DB, Shard, Iter, BatchSize) -> + emqx_rpc:call(Shard, Node, emqx_ds_replication_layer, do_next_v1, [DB, Shard, Iter, BatchSize]). + +-spec store_batch( + node(), + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds_replication_layer:batch(), + emqx_ds:message_store_opts() +) -> + emqx_ds:store_batch_result(). +store_batch(Node, DB, Shard, Batch, Options) -> + emqx_rpc:call(Shard, Node, emqx_ds_replication_layer, do_store_batch_v1, [ + DB, Shard, Batch, Options + ]). + +-spec update_iterator( + node(), + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds_storage_layer:iterator(), + emqx_ds:message_key() +) -> + {ok, emqx_ds_storage_layer:iterator()} | {error, _}. +update_iterator(Node, DB, Shard, OldIter, DSKey) -> + erpc:call(Node, emqx_ds_replication_layer, do_update_iterator_v2, [ + DB, Shard, OldIter, DSKey + ]). + +-spec add_generation([node()], emqx_ds:db()) -> + [{ok, ok} | {error, _}]. +add_generation(Node, DB) -> + erpc:multicall(Node, emqx_ds_replication_layer, do_add_generation_v2, [DB]). + +%%-------------------------------------------------------------------------------- +%% Introduced in V3 +%%-------------------------------------------------------------------------------- + +-spec list_generations_with_lifetimes( + node(), + emqx_ds:db(), + emqx_ds_replication_layer:shard_id() +) -> + #{ + emqx_ds:ds_specific_generation_rank() => emqx_ds:generation_info() + }. +list_generations_with_lifetimes(Node, DB, Shard) -> + erpc:call(Node, emqx_ds_replication_layer, do_list_generations_with_lifetimes_v3, [DB, Shard]). + +-spec drop_generation( + node(), + emqx_ds:db(), + emqx_ds_replication_layer:shard_id(), + emqx_ds_storage_layer:gen_id() +) -> + ok | {error, _}. +drop_generation(Node, DB, Shard, GenId) -> + erpc:call(Node, emqx_ds_replication_layer, do_drop_generation_v3, [DB, Shard, GenId]). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +introduced_in() -> + "5.6.0". diff --git a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl index cb9d81580..d7dccccf5 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_SUITE.erl @@ -155,7 +155,7 @@ t_05_update_iterator(_Config) -> ?assertEqual(Msgs, AllMsgs, #{from_key => Iter1, final_iter => FinalIter}), ok. -t_05_update_config(_Config) -> +t_06_update_config(_Config) -> DB = ?FUNCTION_NAME, ?assertMatch(ok, emqx_ds:open_db(DB, opts())), TopicFilter = ['#'], @@ -199,7 +199,7 @@ t_05_update_config(_Config) -> end, lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). -t_06_add_generation(_Config) -> +t_07_add_generation(_Config) -> DB = ?FUNCTION_NAME, ?assertMatch(ok, emqx_ds:open_db(DB, opts())), TopicFilter = ['#'], @@ -243,6 +243,250 @@ t_06_add_generation(_Config) -> end, lists:foldl(Checker, [], lists:zip(StartTimes, MsgsList)). +%% Verifies the basic usage of `list_generations_with_lifetimes' and `drop_generation'... +%% 1) Cannot drop current generation. +%% 2) All existing generations are returned by `list_generation_with_lifetimes'. +%% 3) Dropping a generation removes it from the list. +%% 4) Dropped generations stay dropped even after restarting the application. +t_08_smoke_list_drop_generation(_Config) -> + DB = ?FUNCTION_NAME, + ?check_trace( + begin + ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + %% Exactly one generation at first. + Generations0 = emqx_ds:list_generations_with_lifetimes(DB), + ?assertMatch( + [{_GenId, #{since := _, until := _}}], + maps:to_list(Generations0), + #{gens => Generations0} + ), + [{GenId0, _}] = maps:to_list(Generations0), + %% Cannot delete current generation + ?assertEqual({error, current_generation}, emqx_ds:drop_generation(DB, GenId0)), + + %% New gen + ok = emqx_ds:add_generation(DB), + Generations1 = emqx_ds:list_generations_with_lifetimes(DB), + ?assertMatch( + [ + {GenId0, #{since := _, until := _}}, + {_GenId1, #{since := _, until := _}} + ], + lists:sort(maps:to_list(Generations1)), + #{gens => Generations1} + ), + [GenId0, GenId1] = lists:sort(maps:keys(Generations1)), + + %% Drop the older one + ?assertEqual(ok, emqx_ds:drop_generation(DB, GenId0)), + Generations2 = emqx_ds:list_generations_with_lifetimes(DB), + ?assertMatch( + [{GenId1, #{since := _, until := _}}], + lists:sort(maps:to_list(Generations2)), + #{gens => Generations2} + ), + + %% Unknown gen_id, as it was already dropped + ?assertEqual({error, not_found}, emqx_ds:drop_generation(DB, GenId0)), + + %% Should persist surviving generation list + ok = application:stop(emqx_durable_storage), + {ok, _} = application:ensure_all_started(emqx_durable_storage), + ok = emqx_ds:open_db(DB, opts()), + + Generations3 = emqx_ds:list_generations_with_lifetimes(DB), + ?assertMatch( + [{GenId1, #{since := _, until := _}}], + lists:sort(maps:to_list(Generations3)), + #{gens => Generations3} + ), + + ok + end, + [] + ), + ok. + +t_drop_generation_with_never_used_iterator(_Config) -> + %% This test checks how the iterator behaves when: + %% 1) it's created at generation 1 and not consumed from. + %% 2) generation 2 is created and 1 dropped. + %% 3) iteration begins. + %% In this case, the iterator won't see any messages and the stream will end. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + Now = emqx_message:timestamp_now(), + Msgs1 = [ + message(<<"foo/bar">>, <<"3">>, Now + 100), + message(<<"foo/baz">>, <<"4">>, Now + 101) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), + + ?assertMatch({ok, end_of_stream, []}, iterate(DB, Iter0, 1)), + + %% New iterator for the new stream will only see the later messages. + [{_, Stream1}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + ?assertNotEqual(Stream0, Stream1), + {ok, Iter1} = emqx_ds:make_iterator(DB, Stream1, TopicFilter, StartTime), + + {ok, Iter, Batch} = iterate(DB, Iter1, 1), + ?assertNotEqual(end_of_stream, Iter), + ?assertEqual(Msgs1, [Msg || {_Key, Msg} <- Batch]), + + ok. + +t_drop_generation_with_used_once_iterator(_Config) -> + %% This test checks how the iterator behaves when: + %% 1) it's created at generation 1 and consumes at least 1 message. + %% 2) generation 2 is created and 1 dropped. + %% 3) iteration continues. + %% In this case, the iterator should see no more messages and the stream will end. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = + [Msg0 | _] = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + {ok, Iter1, Batch1} = emqx_ds:next(DB, Iter0, 1), + ?assertNotEqual(end_of_stream, Iter1), + ?assertEqual([Msg0], [Msg || {_Key, Msg} <- Batch1]), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + Now = emqx_message:timestamp_now(), + Msgs1 = [ + message(<<"foo/bar">>, <<"3">>, Now + 100), + message(<<"foo/baz">>, <<"4">>, Now + 101) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs1)), + + ?assertMatch({ok, end_of_stream, []}, iterate(DB, Iter1, 1)), + + ok. + +t_drop_generation_update_iterator(_Config) -> + %% This checks the behavior of `emqx_ds:update_iterator' after the generation + %% underlying the iterator has been dropped. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + {ok, Iter0} = emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime), + {ok, Iter1, _Batch1} = emqx_ds:next(DB, Iter0, 1), + {ok, _Iter2, [{Key2, _Msg}]} = emqx_ds:next(DB, Iter1, 1), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + ?assertEqual({error, end_of_stream}, emqx_ds:update_iterator(DB, Iter1, Key2)), + + ok. + +t_make_iterator_stale_stream(_Config) -> + %% This checks the behavior of `emqx_ds:make_iterator' after the generation underlying + %% the stream has been dropped. + + DB = ?FUNCTION_NAME, + ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs0)), + + [{_, Stream0}] = emqx_ds:get_streams(DB, TopicFilter, StartTime), + + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:drop_generation(DB, GenId0), + + ?assertEqual( + {error, end_of_stream}, + emqx_ds:make_iterator(DB, Stream0, TopicFilter, StartTime) + ), + + ok. + +t_get_streams_concurrently_with_drop_generation(_Config) -> + %% This checks that we can get all streams while a generation is dropped + %% mid-iteration. + + DB = ?FUNCTION_NAME, + ?check_trace( + #{timetrap => 5_000}, + begin + ?assertMatch(ok, emqx_ds:open_db(DB, opts())), + + [GenId0] = maps:keys(emqx_ds:list_generations_with_lifetimes(DB)), + ok = emqx_ds:add_generation(DB), + ok = emqx_ds:add_generation(DB), + + %% All streams + TopicFilter = emqx_topic:words(<<"foo/+">>), + StartTime = 0, + ?assertMatch([_, _, _], emqx_ds:get_streams(DB, TopicFilter, StartTime)), + + ?force_ordering( + #{?snk_kind := dropped_gen}, + #{?snk_kind := get_streams_get_gen} + ), + + spawn_link(fun() -> + {ok, _} = ?block_until(#{?snk_kind := get_streams_all_gens}), + ok = emqx_ds:drop_generation(DB, GenId0), + ?tp(dropped_gen, #{}) + end), + + ?assertMatch([_, _], emqx_ds:get_streams(DB, TopicFilter, StartTime)), + + ok + end, + [] + ), + + ok. + update_data_set() -> [ [ @@ -295,7 +539,7 @@ iterate(DB, It0, BatchSize, Acc) -> {ok, It, Msgs} -> iterate(DB, It, BatchSize, Acc ++ Msgs); {ok, end_of_stream} -> - {ok, It0, Acc}; + {ok, end_of_stream, Acc}; Ret -> Ret end. From 7c0d37fdb978486b3bcfd07856011a26682a94a1 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 18 Jan 2024 14:28:13 -0300 Subject: [PATCH 72/89] feat(lts): inherit previous generation's lts when possible --- apps/emqx_durable_storage/src/emqx_ds_lts.erl | 11 ++++++- .../src/emqx_ds_storage_bitfield_lts.erl | 30 ++++++++++++++++- .../emqx_ds_storage_bitfield_lts_SUITE.erl | 33 +++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_lts.erl index bcf95852d..9d87cf571 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_lts.erl @@ -18,7 +18,12 @@ %% API: -export([ - trie_create/1, trie_create/0, trie_restore/2, topic_key/3, match_topics/2, lookup_topic_key/2 + trie_create/1, trie_create/0, + trie_restore/2, + trie_restore_existing/2, + topic_key/3, + match_topics/2, + lookup_topic_key/2 ]). %% Debug: @@ -115,6 +120,10 @@ trie_create() -> -spec trie_restore(options(), [{_Key, _Val}]) -> trie(). trie_restore(Options, Dump) -> Trie = trie_create(Options), + trie_restore_existing(Trie, Dump). + +-spec trie_restore_existing(trie(), [{_Key, _Val}]) -> trie(). +trie_restore_existing(Trie, Dump) -> lists:foreach( fun({{StateFrom, Token}, StateTo}) -> trie_insert(Trie, StateFrom, Token, StateTo) diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl index 27d41e6c6..2a3086a57 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl @@ -32,7 +32,8 @@ get_streams/4, make_iterator/5, update_iterator/4, - next/4 + next/4, + post_creation_actions/1 ]). %% internal exports: @@ -200,6 +201,22 @@ open(_Shard, DBHandle, GenId, CFRefs, Schema) -> ts_offset = TSOffsetBits }. +-spec post_creation_actions(emqx_ds_storage_layer:post_creation_context()) -> + s(). +post_creation_actions( + #{ + db := DBHandle, + old_gen_id := OldGenId, + old_cf_refs := OldCFRefs, + new_gen_runtime_data := NewGenData0 + } +) -> + {_, OldTrieCF} = lists:keyfind(trie_cf(OldGenId), 1, OldCFRefs), + #s{trie = NewTrie0} = NewGenData0, + NewTrie = copy_previous_trie(DBHandle, NewTrie0, OldTrieCF), + ?tp(bitfield_lts_inherited_trie, #{}), + NewGenData0#s{trie = NewTrie}. + -spec drop( emqx_ds_storage_layer:shard_id(), rocksdb:db_handle(), @@ -516,6 +533,17 @@ restore_trie(TopicIndexBytes, DB, CF) -> rocksdb:iterator_close(IT) end. +-spec copy_previous_trie(rocksdb:db_handle(), emqx_ds_lts:trie(), rocksdb:cf_handle()) -> + emqx_ds_lts:trie(). +copy_previous_trie(DBHandle, NewTrie, OldCF) -> + {ok, IT} = rocksdb:iterator(DBHandle, OldCF, []), + try + OldDump = read_persisted_trie(IT, rocksdb:iterator_move(IT, first)), + emqx_ds_lts:trie_restore_existing(NewTrie, OldDump) + after + rocksdb:iterator_close(IT) + end. + read_persisted_trie(IT, {ok, KeyB, ValB}) -> [ {binary_to_term(KeyB), binary_to_term(ValB)} diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl index 03ff1a6cb..5d32143a7 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_bitfield_lts_SUITE.erl @@ -131,6 +131,39 @@ t_get_streams(_Config) -> ?assert(lists:member(A, AllStreams)), ok. +t_new_generation_inherit_trie(_Config) -> + %% This test checks that we inherit the previous generation's LTS when creating a new + %% generation. + ?check_trace( + begin + %% Create a bunch of topics to be learned in the first generation + Timestamps = lists:seq(1, 10_000, 100), + Batch = [ + begin + B = integer_to_binary(I), + make_message( + TS, + <<"wildcard/", B/binary, "/suffix/", Suffix/binary>>, + integer_to_binary(TS) + ) + end + || I <- lists:seq(1, 200), + TS <- Timestamps, + Suffix <- [<<"foo">>, <<"bar">>] + ], + ok = emqx_ds_storage_layer:store_batch(?SHARD, Batch, []), + %% Now we create a new generation with the same LTS module. It should inherit the + %% learned trie. + ok = emqx_ds_storage_layer:add_generation(?SHARD), + ok + end, + fun(Trace) -> + ?assertMatch([_], ?of_kind(bitfield_lts_inherited_trie, Trace)), + ok + end + ), + ok. + t_replay(_Config) -> %% Create concrete topics: Topics = [<<"foo/bar">>, <<"foo/bar/baz">>], From d323fc7c2702fc2cc355affdcec2efcf1cbf2da4 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Thu, 18 Jan 2024 14:28:37 -0300 Subject: [PATCH 73/89] feat(ps): add message gc --- .../emqx_persistent_message_ds_gc_worker.erl | 157 ++++++++++++++++++ .../src/emqx_persistent_session_ds_sup.erl | 5 +- apps/emqx/src/emqx_schema.erl | 8 + .../test/emqx_persistent_messages_SUITE.erl | 87 +++++++++- changes/ce/feat-12338.en.md | 1 + rel/i18n/emqx_schema.hocon | 3 + 6 files changed, 257 insertions(+), 4 deletions(-) create mode 100644 apps/emqx/src/emqx_persistent_message_ds_gc_worker.erl create mode 100644 changes/ce/feat-12338.en.md diff --git a/apps/emqx/src/emqx_persistent_message_ds_gc_worker.erl b/apps/emqx/src/emqx_persistent_message_ds_gc_worker.erl new file mode 100644 index 000000000..b960eae9e --- /dev/null +++ b/apps/emqx/src/emqx_persistent_message_ds_gc_worker.erl @@ -0,0 +1,157 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_persistent_message_ds_gc_worker). + +-behaviour(gen_server). + +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("stdlib/include/qlc.hrl"). +-include_lib("stdlib/include/ms_transform.hrl"). + +-include("emqx_persistent_session_ds.hrl"). + +%% API +-export([ + start_link/0, + gc/0 +]). + +%% `gen_server' API +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2 +]). + +%% call/cast/info records +-record(gc, {}). + +%%-------------------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------------------- + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +%% For testing or manual ops +gc() -> + gen_server:call(?MODULE, #gc{}, infinity). + +%%-------------------------------------------------------------------------------- +%% `gen_server' API +%%-------------------------------------------------------------------------------- + +init(_Opts) -> + ensure_gc_timer(), + State = #{}, + {ok, State}. + +handle_call(#gc{}, _From, State) -> + maybe_gc(), + {reply, ok, State}; +handle_call(_Call, _From, State) -> + {reply, error, State}. + +handle_cast(_Cast, State) -> + {noreply, State}. + +handle_info(#gc{}, State) -> + try_gc(), + ensure_gc_timer(), + {noreply, State}; +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------------------- +%% Internal fns +%%-------------------------------------------------------------------------------- + +ensure_gc_timer() -> + Timeout = emqx_config:get([session_persistence, message_retention_period]), + _ = erlang:send_after(Timeout, self(), #gc{}), + ok. + +try_gc() -> + %% Only cores should run GC. + CoreNodes = mria_membership:running_core_nodelist(), + Res = global:trans( + {?MODULE, self()}, + fun maybe_gc/0, + CoreNodes, + %% Note: we set retries to 1 here because, in rare occasions, GC might start at the + %% same time in more than one node, and each one will abort the other. By allowing + %% one retry, at least one node will (hopefully) get to enter the transaction and + %% the other will abort. If GC runs too fast, both nodes might run in sequence. + %% But, in that case, GC is clearly not too costly, and that shouldn't be a problem, + %% resource-wise. + _Retries = 1 + ), + case Res of + aborted -> + ?tp(ds_message_gc_lock_taken, #{}), + ok; + ok -> + ok + end. + +now_ms() -> + erlang:system_time(millisecond). + +maybe_gc() -> + AllGens = emqx_ds:list_generations_with_lifetimes(?PERSISTENT_MESSAGE_DB), + NowMS = now_ms(), + RetentionPeriod = emqx_config:get([session_persistence, message_retention_period]), + TimeThreshold = NowMS - RetentionPeriod, + maybe_create_new_generation(AllGens, TimeThreshold), + ?tp_span( + ps_message_gc, + #{}, + begin + ExpiredGens = + maps:filter( + fun(_GenId, #{until := Until}) -> + is_number(Until) andalso Until =< TimeThreshold + end, + AllGens + ), + ExpiredGenIds = maps:keys(ExpiredGens), + lists:foreach( + fun(GenId) -> + ok = emqx_ds:drop_generation(?PERSISTENT_MESSAGE_DB, GenId), + ?tp(message_gc_generation_dropped, #{gen_id => GenId}) + end, + ExpiredGenIds + ) + end + ). + +maybe_create_new_generation(AllGens, TimeThreshold) -> + NeedNewGen = + lists:all( + fun({_GenId, #{created_at := CreatedAt}}) -> + CreatedAt =< TimeThreshold + end, + maps:to_list(AllGens) + ), + case NeedNewGen of + false -> + ?tp(ps_message_gc_too_early, #{}), + ok; + true -> + ok = emqx_ds:add_generation(?PERSISTENT_MESSAGE_DB), + ?tp(ps_message_gc_added_gen, #{}) + end. diff --git a/apps/emqx/src/emqx_persistent_session_ds_sup.erl b/apps/emqx/src/emqx_persistent_session_ds_sup.erl index 5bd620e8b..11e05be82 100644 --- a/apps/emqx/src/emqx_persistent_session_ds_sup.erl +++ b/apps/emqx/src/emqx_persistent_session_ds_sup.erl @@ -48,13 +48,14 @@ init(Opts) -> do_init(_Opts) -> SupFlags = #{ - strategy => rest_for_one, + strategy => one_for_one, intensity => 10, period => 2, auto_shutdown => never }, CoreChildren = [ - worker(gc_worker, emqx_persistent_session_ds_gc_worker, []) + worker(session_gc_worker, emqx_persistent_session_ds_gc_worker, []), + worker(message_gc_worker, emqx_persistent_message_ds_gc_worker, []) ], Children = case mria_rlog:role() of diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 7cd67089d..56d575bd9 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -1855,6 +1855,14 @@ fields("session_persistence") -> desc => ?DESC(session_ds_session_gc_batch_size) } )}, + {"message_retention_period", + sc( + timeout_duration(), + #{ + default => <<"1d">>, + desc => ?DESC(session_ds_message_retention_period) + } + )}, {"force_persistence", sc( boolean(), diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index f25f38098..c46d726f4 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -19,6 +19,7 @@ -include_lib("stdlib/include/assert.hrl"). -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("emqx/include/emqx.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). -compile(export_all). @@ -45,10 +46,20 @@ init_per_testcase(t_session_subscription_iterators = TestCase, Config) -> Cluster = cluster(), Nodes = emqx_cth_cluster:start(Cluster, #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)}), [{nodes, Nodes} | Config]; +init_per_testcase(t_message_gc = TestCase, Config) -> + Opts = #{ + extra_emqx_conf => + "\n session_persistence.message_retention_period = 1s" + "\n session_persistence.storage.builtin.n_shards = 3" + }, + common_init_per_testcase(TestCase, [{n_shards, 3} | Config], Opts); init_per_testcase(TestCase, Config) -> + common_init_per_testcase(TestCase, Config, _Opts = #{}). + +common_init_per_testcase(TestCase, Config, Opts) -> ok = emqx_ds:drop_db(?PERSISTENT_MESSAGE_DB), Apps = emqx_cth_suite:start( - app_specs(), + app_specs(Opts), #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)} ), [{apps, Apps} | Config]. @@ -379,6 +390,66 @@ t_publish_empty_topic_levels(_Config) -> emqtt:stop(Pub) end. +t_message_gc_too_young(_Config) -> + %% Check that GC doesn't attempt to create a new generation if there are fresh enough + %% generations around. The stability of this test relies on the default value for + %% message retention being long enough. Currently, the default is 1 hour. + ?check_trace( + ok = emqx_persistent_message_ds_gc_worker:gc(), + fun(Trace) -> + ?assertMatch([_], ?of_kind(ps_message_gc_too_early, Trace)), + ok + end + ), + ok. + +t_message_gc(Config) -> + %% Check that, after GC runs, a new generation is created, retaining messages, and + %% older messages no longer are accessible. + NShards = ?config(n_shards, Config), + ?check_trace( + #{timetrap => 10_000}, + begin + %% ensure some messages are in the first generation + ?force_ordering( + #{?snk_kind := inserted_batch}, + #{?snk_kind := ps_message_gc_added_gen} + ), + Msgs0 = [ + message(<<"foo/bar">>, <<"1">>, 0), + message(<<"foo/baz">>, <<"2">>, 1) + ], + ok = emqx_ds:store_batch(?PERSISTENT_MESSAGE_DB, Msgs0), + ?tp(inserted_batch, #{}), + {ok, _} = ?block_until(#{?snk_kind := ps_message_gc_added_gen}), + + Now = emqx_message:timestamp_now(), + Msgs1 = [ + message(<<"foo/bar">>, <<"3">>, Now + 100), + message(<<"foo/baz">>, <<"4">>, Now + 101) + ], + ok = emqx_ds:store_batch(?PERSISTENT_MESSAGE_DB, Msgs1), + + {ok, _} = snabbkaffe:block_until( + ?match_n_events(NShards, #{?snk_kind := message_gc_generation_dropped}), + infinity + ), + + TopicFilter = emqx_topic:words(<<"#">>), + StartTime = 0, + Msgs = consume(TopicFilter, StartTime), + %% only "1" and "2" should have been GC'ed + ?assertEqual( + sets:from_list([<<"3">>, <<"4">>], [{version, 2}]), + sets:from_list([emqx_message:payload(Msg) || Msg <- Msgs], [{version, 2}]) + ), + + ok + end, + [] + ), + ok. + %% connect(ClientId, CleanStart, EI) -> @@ -438,9 +509,13 @@ publish(Node, Message) -> erpc:call(Node, emqx, publish, [Message]). app_specs() -> + app_specs(_Opts = #{}). + +app_specs(Opts) -> + ExtraEMQXConf = maps:get(extra_emqx_conf, Opts, ""), [ emqx_durable_storage, - {emqx, "session_persistence {enable = true}"} + {emqx, "session_persistence {enable = true}" ++ ExtraEMQXConf} ]. cluster() -> @@ -459,3 +534,11 @@ clear_db() -> mria:stop(), ok = mnesia:delete_schema([node()]), ok. + +message(Topic, Payload, PublishedAt) -> + #message{ + topic = Topic, + payload = Payload, + timestamp = PublishedAt, + id = emqx_guid:gen() + }. diff --git a/changes/ce/feat-12338.en.md b/changes/ce/feat-12338.en.md new file mode 100644 index 000000000..8b8edcb76 --- /dev/null +++ b/changes/ce/feat-12338.en.md @@ -0,0 +1 @@ +Added time-based message garbage collection to the RocksDB-based persistent session backend. diff --git a/rel/i18n/emqx_schema.hocon b/rel/i18n/emqx_schema.hocon index af4251328..be16c765e 100644 --- a/rel/i18n/emqx_schema.hocon +++ b/rel/i18n/emqx_schema.hocon @@ -1608,5 +1608,8 @@ The session will query the DB for the new messages when the value of `FreeSpace` `FreeSpace` is calculated as `ReceiveMaximum` for the session - number of inflight messages.""" +session_ds_message_retention_period.desc: +"""The minimum amount of time that messages should be retained for. After messages have been in storage for at least this period of time, they'll be dropped.""" + } From 878c9ee8b14fb0facfeb52275ad55a457f455a4e Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Mon, 22 Jan 2024 13:10:44 -0300 Subject: [PATCH 74/89] fix(ds): do not count persistent session-only routed messages as dropped Fixes https://emqx.atlassian.net/browse/EMQX-11539 --- apps/emqx/src/emqx_broker.erl | 12 ++++--- .../test/emqx_persistent_messages_SUITE.erl | 31 +++++++++++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx_broker.erl b/apps/emqx/src/emqx_broker.erl index ac9116cbd..6dc893043 100644 --- a/apps/emqx/src/emqx_broker.erl +++ b/apps/emqx/src/emqx_broker.erl @@ -249,7 +249,7 @@ publish(Msg) when is_record(Msg, message) -> []; Msg1 = #message{topic = Topic} -> PersistRes = persist_publish(Msg1), - PersistRes ++ route(aggre(emqx_router:match_routes(Topic)), delivery(Msg1)) + route(aggre(emqx_router:match_routes(Topic)), delivery(Msg1), PersistRes) end. persist_publish(Msg) -> @@ -289,18 +289,20 @@ delivery(Msg) -> #delivery{sender = self(), message = Msg}. %% Route %%-------------------------------------------------------------------- --spec route([emqx_types:route_entry()], emqx_types:delivery()) -> +-spec route([emqx_types:route_entry()], emqx_types:delivery(), nil() | [persisted]) -> emqx_types:publish_result(). -route([], #delivery{message = Msg}) -> +route([], #delivery{message = Msg}, _PersistRes = []) -> ok = emqx_hooks:run('message.dropped', [Msg, #{node => node()}, no_subscribers]), ok = inc_dropped_cnt(Msg), []; -route(Routes, Delivery) -> +route([], _Delivery, PersistRes = [_ | _]) -> + PersistRes; +route(Routes, Delivery, PersistRes) -> lists:foldl( fun(Route, Acc) -> [do_route(Route, Delivery) | Acc] end, - [], + PersistRes, Routes ). diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index c46d726f4..73c88adc8 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -450,6 +450,32 @@ t_message_gc(Config) -> ), ok. +t_metrics_not_dropped(_Config) -> + %% Asserts that, if only persisted sessions are subscribed to a topic being published + %% to, we don't bump the `message.dropped' metric, nor we run the equivalent hook. + Sub = connect(<>, true, 30), + on_exit(fun() -> emqtt:stop(Sub) end), + Pub = connect(<>, true, 30), + on_exit(fun() -> emqtt:stop(Pub) end), + Hookpoint = 'message.dropped', + emqx_hooks:add(Hookpoint, {?MODULE, on_message_dropped, [self()]}, 1_000), + on_exit(fun() -> emqx_hooks:del(Hookpoint, {?MODULE, on_message_dropped}) end), + + DroppedBefore = emqx_metrics:val('messages.dropped'), + DroppedNoSubBefore = emqx_metrics:val('messages.dropped.no_subscribers'), + + {ok, _, [?RC_GRANTED_QOS_1]} = emqtt:subscribe(Sub, <<"t/+">>, ?QOS_1), + emqtt:publish(Pub, <<"t/ps">>, <<"payload">>, ?QOS_1), + ?assertMatch([_], receive_messages(1, 1_500)), + + DroppedAfter = emqx_metrics:val('messages.dropped'), + DroppedNoSubAfter = emqx_metrics:val('messages.dropped.no_subscribers'), + + ?assertEqual(DroppedBefore, DroppedAfter), + ?assertEqual(DroppedNoSubBefore, DroppedNoSubAfter), + + ok. + %% connect(ClientId, CleanStart, EI) -> @@ -542,3 +568,8 @@ message(Topic, Payload, PublishedAt) -> timestamp = PublishedAt, id = emqx_guid:gen() }. + +on_message_dropped(Msg, Context, Res, TestPid) -> + ErrCtx = #{msg => Msg, ctx => Context, res => Res}, + ct:pal("this hook should not be called.\n ~p", [ErrCtx]), + exit(TestPid, {hookpoint_called, ErrCtx}). From dad8a32e0bee2712a837efeb2557fbc28e69fb7d Mon Sep 17 00:00:00 2001 From: firest Date: Mon, 22 Jan 2024 20:45:10 +0800 Subject: [PATCH 75/89] feat(opents): improve the OpentsDB bridge to v2 style --- apps/emqx_bridge/src/emqx_action_info.erl | 3 +- .../src/emqx_bridge_opents.erl | 128 ++++- .../src/emqx_bridge_opents_action_info.erl | 71 +++ .../src/emqx_bridge_opents_connector.erl | 194 ++++++- .../test/emqx_bridge_opents_SUITE.erl | 484 +++++++----------- .../src/schema/emqx_connector_ee_schema.erl | 18 +- .../src/schema/emqx_connector_schema.erl | 4 +- rel/i18n/emqx_bridge_opents.hocon | 31 ++ rel/i18n/emqx_bridge_opents_connector.hocon | 6 + 9 files changed, 607 insertions(+), 332 deletions(-) create mode 100644 apps/emqx_bridge_opents/src/emqx_bridge_opents_action_info.erl diff --git a/apps/emqx_bridge/src/emqx_action_info.erl b/apps/emqx_bridge/src/emqx_action_info.erl index d80050191..4f6228998 100644 --- a/apps/emqx_bridge/src/emqx_action_info.erl +++ b/apps/emqx_bridge/src/emqx_action_info.erl @@ -98,7 +98,8 @@ hard_coded_action_info_modules_ee() -> emqx_bridge_timescale_action_info, emqx_bridge_redis_action_info, emqx_bridge_iotdb_action_info, - emqx_bridge_es_action_info + emqx_bridge_es_action_info, + emqx_bridge_opents_action_info ]. -else. hard_coded_action_info_modules_ee() -> diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl b/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl index cfb12453d..7e490576f 100644 --- a/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- -module(emqx_bridge_opents). @@ -7,10 +7,12 @@ -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx_resource/include/emqx_resource.hrl"). --import(hoconsc, [mk/2, enum/1, ref/2]). +-import(hoconsc, [mk/2, enum/1, ref/2, array/1]). -export([ - conn_bridge_examples/1 + conn_bridge_examples/1, + bridge_v2_examples/1, + default_data_template/0 ]). -export([ @@ -20,8 +22,11 @@ desc/1 ]). +-define(CONNECTOR_TYPE, opents). +-define(ACTION_TYPE, ?CONNECTOR_TYPE). + %% ------------------------------------------------------------------------------------------------- -%% api +%% v1 examples conn_bridge_examples(Method) -> [ #{ @@ -34,7 +39,7 @@ conn_bridge_examples(Method) -> values(_Method) -> #{ - enable => true, + enabledb => true, type => opents, name => <<"foo">>, server => <<"http://127.0.0.1:4242">>, @@ -50,7 +55,37 @@ values(_Method) -> }. %% ------------------------------------------------------------------------------------------------- -%% Hocon Schema Definitions +%% v2 examples +bridge_v2_examples(Method) -> + [ + #{ + <<"opents">> => #{ + summary => <<"OpenTSDB Action">>, + value => emqx_bridge_v2_schema:action_values( + Method, ?ACTION_TYPE, ?CONNECTOR_TYPE, action_values() + ) + } + } + ]. + +action_values() -> + #{ + parameters => #{ + data => default_data_template() + } + }. + +default_data_template() -> + [ + #{ + metric => <<"${metric}">>, + tags => <<"${tags}">>, + value => <<"${value}">> + } + ]. + +%% ------------------------------------------------------------------------------------------------- +%% V1 Schema Definitions namespace() -> "bridge_opents". roots() -> []. @@ -65,10 +100,89 @@ fields("post") -> fields("put") -> fields("config"); fields("get") -> - emqx_bridge_schema:status_fields() ++ fields("post"). + emqx_bridge_schema:status_fields() ++ fields("post"); +%% ------------------------------------------------------------------------------------------------- +%% V2 Schema Definitions + +fields(action) -> + {opents, + mk( + hoconsc:map(name, ref(?MODULE, action_config)), + #{ + desc => <<"OpenTSDB Action Config">>, + required => false + } + )}; +fields(action_config) -> + emqx_bridge_v2_schema:make_producer_action_schema( + mk( + ref(?MODULE, action_parameters), + #{ + required => true, desc => ?DESC("action_parameters") + } + ) + ); +fields(action_parameters) -> + [ + {data, + mk( + array(ref(?MODULE, action_parameters_data)), + #{ + desc => ?DESC("action_parameters_data"), + default => <<"[]">> + } + )} + ]; +fields(action_parameters_data) -> + [ + {timestamp, + mk( + binary(), + #{ + desc => ?DESC("config_parameters_timestamp"), + required => false + } + )}, + {metric, + mk( + binary(), + #{ + required => true, + desc => ?DESC("config_parameters_metric") + } + )}, + {tags, + mk( + binary(), + #{ + required => true, + desc => ?DESC("config_parameters_tags") + } + )}, + {value, + mk( + binary(), + #{ + required => true, + desc => ?DESC("config_parameters_value") + } + )} + ]; +fields("post_bridge_v2") -> + emqx_bridge_schema:type_and_name_fields(enum([opents])) ++ fields(action_config); +fields("put_bridge_v2") -> + fields(action_config); +fields("get_bridge_v2") -> + emqx_bridge_schema:status_fields() ++ fields("post_bridge_v2"). desc("config") -> ?DESC("desc_config"); +desc(action_config) -> + ?DESC("desc_config"); +desc(action_parameters) -> + ?DESC("action_parameters"); +desc(action_parameters_data) -> + ?DESC("action_parameters_data"); desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> ["Configuration for OpenTSDB using `", string:to_upper(Method), "` method."]; desc(_) -> diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents_action_info.erl b/apps/emqx_bridge_opents/src/emqx_bridge_opents_action_info.erl new file mode 100644 index 000000000..4c4c9568c --- /dev/null +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents_action_info.erl @@ -0,0 +1,71 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- +-module(emqx_bridge_opents_action_info). + +-behaviour(emqx_action_info). + +-elvis([{elvis_style, invalid_dynamic_call, disable}]). + +%% behaviour callbacks +-export([ + action_type_name/0, + bridge_v1_config_to_action_config/2, + bridge_v1_config_to_connector_config/1, + bridge_v1_type_name/0, + connector_action_config_to_bridge_v1_config/2, + connector_type_name/0, + schema_module/0 +]). + +-import(emqx_utils_conv, [bin/1]). + +-define(ACTION_TYPE, opents). +-define(SCHEMA_MODULE, emqx_bridge_opents). + +action_type_name() -> ?ACTION_TYPE. +bridge_v1_type_name() -> ?ACTION_TYPE. +connector_type_name() -> ?ACTION_TYPE. + +schema_module() -> ?SCHEMA_MODULE. + +connector_action_config_to_bridge_v1_config(ConnectorConfig, ActionConfig) -> + MergedConfig = + emqx_utils_maps:deep_merge( + maps:without( + [<<"description">>, <<"local_topic">>, <<"connector">>, <<"data">>], + emqx_utils_maps:unindent(<<"parameters">>, ActionConfig) + ), + ConnectorConfig + ), + BridgeV1Keys = schema_keys("config"), + maps:with(BridgeV1Keys, MergedConfig). + +bridge_v1_config_to_action_config(BridgeV1Config, ConnectorName) -> + ActionTopLevelKeys = schema_keys(action_config), + ActionParametersKeys = schema_keys(action_parameters), + ActionKeys = ActionTopLevelKeys ++ ActionParametersKeys, + ActionConfig = make_config_map(ActionKeys, ActionParametersKeys, BridgeV1Config), + emqx_utils_maps:update_if_present( + <<"resource_opts">>, + fun emqx_bridge_v2_schema:project_to_actions_resource_opts/1, + ActionConfig#{<<"connector">> => ConnectorName} + ). + +bridge_v1_config_to_connector_config(BridgeV1Config) -> + ConnectorKeys = schema_keys(emqx_bridge_opents_connector, "config_connector"), + emqx_utils_maps:update_if_present( + <<"resource_opts">>, + fun emqx_connector_schema:project_to_connector_resource_opts/1, + maps:with(ConnectorKeys, BridgeV1Config) + ). + +make_config_map(PickKeys, IndentKeys, Config) -> + Conf0 = maps:with(PickKeys, Config#{<<"data">> => []}), + emqx_utils_maps:indent(<<"parameters">>, IndentKeys, Conf0). + +schema_keys(Name) -> + schema_keys(?SCHEMA_MODULE, Name). + +schema_keys(Mod, Name) -> + [bin(Key) || Key <- proplists:get_keys(Mod:fields(Name))]. diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl b/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl index 9271abe15..6af1e2f55 100644 --- a/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- -module(emqx_bridge_opents_connector). @@ -12,7 +12,7 @@ -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("hocon/include/hoconsc.hrl"). --export([roots/0, fields/1]). +-export([namespace/0, roots/0, fields/1, desc/1]). %% `emqx_resource' API -export([ @@ -21,15 +21,25 @@ on_stop/2, on_query/3, on_batch_query/3, - on_get_status/2 + on_get_status/2, + on_add_channel/4, + on_remove_channel/3, + on_get_channels/1, + on_get_channel_status/3 ]). +-export([connector_examples/1]). + -export([connect/1]). -import(hoconsc, [mk/2, enum/1, ref/2]). +-define(CONNECTOR_TYPE, opents). + +namespace() -> "opents_connector". + %%===================================================================== -%% Hocon schema +%% V1 Hocon schema roots() -> [{config, #{type => hoconsc:ref(?MODULE, config)}}]. @@ -40,8 +50,56 @@ fields(config) -> {summary, mk(boolean(), #{default => true, desc => ?DESC("summary")})}, {details, mk(boolean(), #{default => false, desc => ?DESC("details")})}, {auto_reconnect, fun emqx_connector_schema_lib:auto_reconnect/1} + ]; +%%===================================================================== +%% V2 Hocon schema + +fields("config_connector") -> + emqx_connector_schema:common_fields() ++ + proplists_without([auto_reconnect], fields(config)); +fields("post") -> + emqx_connector_schema:type_and_name_fields(enum([opents])) ++ fields("config_connector"); +fields("put") -> + fields("config_connector"); +fields("get") -> + emqx_bridge_schema:status_fields() ++ fields("post"). + +desc(config) -> + ?DESC("desc_config"); +desc("config_connector") -> + ?DESC("desc_config"); +desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> + ["Configuration for IoTDB using `", string:to_upper(Method), "` method."]; +desc(_) -> + undefined. + +proplists_without(Keys, List) -> + [El || El = {K, _} <- List, not lists:member(K, Keys)]. + +%%===================================================================== +%% V2 examples +connector_examples(Method) -> + [ + #{ + <<"opents">> => + #{ + summary => <<"OpenTSDB Connector">>, + value => emqx_connector_schema:connector_values( + Method, ?CONNECTOR_TYPE, connector_example_values() + ) + } + } ]. +connector_example_values() -> + #{ + name => <<"opents_connector">>, + type => opents, + enable => true, + server => <<"http://localhost:4242/">>, + pool_size => 8 + }. + %%======================================================================================== %% `emqx_resource' API %%======================================================================================== @@ -56,8 +114,7 @@ on_start( server := Server, pool_size := PoolSize, summary := Summary, - details := Details, - resource_opts := #{batch_size := BatchSize} + details := Details } = Config ) -> ?SLOG(info, #{ @@ -70,11 +127,10 @@ on_start( {server, to_str(Server)}, {summary, Summary}, {details, Details}, - {max_batch_size, BatchSize}, {pool_size, PoolSize} ], - State = #{pool_name => InstanceId, server => Server}, + State = #{pool_name => InstanceId, server => Server, channels => #{}}, case opentsdb_connectivity(Server) of ok -> case emqx_resource_pool:start(InstanceId, ?MODULE, Options) of @@ -93,6 +149,7 @@ on_stop(InstanceId, _State) -> msg => "stopping_opents_connector", connector => InstanceId }), + ?tp(opents_bridge_stopped, #{instance_id => InstanceId}), emqx_resource_pool:stop(InstanceId). on_query(InstanceId, Request, State) -> @@ -101,10 +158,14 @@ on_query(InstanceId, Request, State) -> on_batch_query( InstanceId, BatchReq, - State + #{channels := Channels} = State ) -> - Datas = [format_opentsdb_msg(Msg) || {_Key, Msg} <- BatchReq], - do_query(InstanceId, Datas, State). + case try_render_messages(BatchReq, Channels) of + {ok, Datas} -> + do_query(InstanceId, Datas, State); + Error -> + Error + end. on_get_status(_InstanceId, #{server := Server}) -> Result = @@ -117,6 +178,39 @@ on_get_status(_InstanceId, #{server := Server}) -> end, Result. +on_add_channel( + _InstanceId, + #{channels := Channels} = OldState, + ChannelId, + #{ + parameters := #{data := Data} = Parameter + } +) -> + case maps:is_key(ChannelId, Channels) of + true -> + {error, already_exists}; + _ -> + Channel = Parameter#{ + data := preproc_data_template(Data) + }, + Channels2 = Channels#{ChannelId => Channel}, + {ok, OldState#{channels := Channels2}} + end. + +on_remove_channel(_InstanceId, #{channels := Channels} = OldState, ChannelId) -> + {ok, OldState#{channels => maps:remove(ChannelId, Channels)}}. + +on_get_channels(InstanceId) -> + emqx_bridge_v2:get_channels_for_connector(InstanceId). + +on_get_channel_status(InstanceId, ChannelId, #{channels := Channels} = State) -> + case maps:is_key(ChannelId, Channels) of + true -> + on_get_status(InstanceId, State); + _ -> + {error, not_exists} + end. + %%======================================================================================== %% Helper fns %%======================================================================================== @@ -127,6 +221,9 @@ do_query(InstanceId, Query, #{pool_name := PoolName} = State) -> "opents_connector_received", #{connector => InstanceId, query => Query, state => State} ), + + ?tp(opents_bridge_on_query, #{instance_id => InstanceId}), + Result = ecpool:pick_and_do(PoolName, {opentsdb, put, [Query]}, no_handover), case Result of @@ -172,17 +269,66 @@ opentsdb_connectivity(Server) -> end, emqx_connector_lib:http_connectivity(SvrUrl, ?HTTP_CONNECT_TIMEOUT). -format_opentsdb_msg(Msg) -> - maps:with( - [ - timestamp, - metric, - tags, - value, - <<"timestamp">>, - <<"metric">>, - <<"tags">>, - <<"value">> - ], - Msg +try_render_messages([{ChannelId, _} | _] = BatchReq, Channels) -> + case maps:find(ChannelId, Channels) of + {ok, Channel} -> + {ok, + lists:foldl( + fun({_, Message}, Acc) -> + render_channel_message(Message, Channel, Acc) + end, + [], + BatchReq + )}; + _ -> + {error, {unrecoverable_error, {invalid_channel_id, ChannelId}}} + end. + +render_channel_message(Msg, #{data := DataList}, Acc) -> + RawOpts = #{return => rawlist, var_trans => fun(X) -> X end}, + lists:foldl( + fun(#{metric := MetricTk, tags := TagsTk, value := ValueTk} = Data, InAcc) -> + MetricVal = emqx_placeholder:proc_tmpl(MetricTk, Msg), + TagsVal = + case emqx_placeholder:proc_tmpl(TagsTk, Msg, RawOpts) of + [undefined] -> + #{}; + [Any] -> + Any + end, + ValueVal = + case ValueTk of + [_] -> + erlang:hd(emqx_placeholder:proc_tmpl(ValueTk, Msg, RawOpts)); + _ -> + emqx_placeholder:proc_tmpl(ValueTk, Msg) + end, + Base = #{metric => MetricVal, tags => TagsVal, value => ValueVal}, + [ + case maps:get(timestamp, Data, undefined) of + undefined -> + Base; + TimestampTk -> + Base#{timestamp => emqx_placeholder:proc_tmpl(TimestampTk, Msg)} + end + | InAcc + ] + end, + Acc, + DataList + ). + +preproc_data_template([]) -> + preproc_data_template(emqx_bridge_opents:default_data_template()); +preproc_data_template(DataList) -> + lists:map( + fun(Data) -> + maps:map( + fun(_Key, Value) -> + emqx_placeholder:preproc_tmpl(Value) + end, + Data + ) + end, + DataList ). diff --git a/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl b/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl index 3632ce786..f86ae6986 100644 --- a/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl +++ b/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- -module(emqx_bridge_opents_SUITE). @@ -12,7 +12,8 @@ -include_lib("snabbkaffe/include/snabbkaffe.hrl"). % DB defaults --define(BATCH_SIZE, 10). +-define(BRIDGE_TYPE_BIN, <<"opents">>). +-define(APPS, [opentsdb, emqx_bridge, emqx_resource, emqx_rule_engine, emqx_bridge_opents_SUITE]). %%------------------------------------------------------------------------------ %% CT boilerplate @@ -20,95 +21,34 @@ all() -> [ - {group, with_batch}, - {group, without_batch} + {group, default} ]. groups() -> - TCs = emqx_common_test_helpers:all(?MODULE), + AllTCs = emqx_common_test_helpers:all(?MODULE), [ - {with_batch, TCs}, - {without_batch, TCs} + {default, AllTCs} ]. -init_per_group(with_batch, Config0) -> - Config = [{batch_size, ?BATCH_SIZE} | Config0], - common_init(Config); -init_per_group(without_batch, Config0) -> - Config = [{batch_size, 1} | Config0], - common_init(Config); -init_per_group(_Group, Config) -> - Config. - -end_per_group(Group, Config) when Group =:= with_batch; Group =:= without_batch -> - ProxyHost = ?config(proxy_host, Config), - ProxyPort = ?config(proxy_port, Config), - emqx_common_test_helpers:reset_proxy(ProxyHost, ProxyPort), - ok; -end_per_group(_Group, _Config) -> - ok. - init_per_suite(Config) -> - Config. + emqx_bridge_v2_testlib:init_per_suite(Config, ?APPS). -end_per_suite(_Config) -> - emqx_mgmt_api_test_util:end_suite(), - ok = emqx_common_test_helpers:stop_apps([opentsdb, emqx_bridge, emqx_resource, emqx_conf]), - ok. +end_per_suite(Config) -> + emqx_bridge_v2_testlib:end_per_suite(Config). -init_per_testcase(_Testcase, Config) -> - delete_bridge(Config), - snabbkaffe:start_trace(), - Config. - -end_per_testcase(_Testcase, Config) -> - ProxyHost = ?config(proxy_host, Config), - ProxyPort = ?config(proxy_port, Config), - emqx_common_test_helpers:reset_proxy(ProxyHost, ProxyPort), - ok = snabbkaffe:stop(), - delete_bridge(Config), - ok. - -%%------------------------------------------------------------------------------ -%% Helper fns -%%------------------------------------------------------------------------------ - -common_init(ConfigT) -> - Host = os:getenv("OPENTS_HOST", "toxiproxy"), +init_per_group(default, Config0) -> + Host = os:getenv("OPENTS_HOST", "toxiproxy.emqx.net"), Port = list_to_integer(os:getenv("OPENTS_PORT", "4242")), - - Config0 = [ - {opents_host, Host}, - {opents_port, Port}, - {proxy_name, "opents"} - | ConfigT - ], - - BridgeType = proplists:get_value(bridge_type, Config0, <<"opents">>), + ProxyName = "opents", case emqx_common_test_helpers:is_tcp_server_available(Host, Port) of true -> - % Setup toxiproxy - ProxyHost = os:getenv("PROXY_HOST", "toxiproxy"), - ProxyPort = list_to_integer(os:getenv("PROXY_PORT", "8474")), - emqx_common_test_helpers:reset_proxy(ProxyHost, ProxyPort), - % Ensure enterprise bridge module is loaded - ok = emqx_common_test_helpers:start_apps([ - emqx_conf, emqx_resource, emqx_bridge - ]), - _ = application:ensure_all_started(opentsdb), - _ = emqx_bridge_enterprise:module_info(), - emqx_mgmt_api_test_util:init_suite(), - {Name, OpenTSConf} = opents_config(BridgeType, Config0), - Config = - [ - {opents_config, OpenTSConf}, - {opents_bridge_type, BridgeType}, - {opents_name, Name}, - {proxy_host, ProxyHost}, - {proxy_port, ProxyPort} - | Config0 - ], - Config; + Config = emqx_bridge_v2_testlib:init_per_group(default, ?BRIDGE_TYPE_BIN, Config0), + [ + {bridge_host, Host}, + {bridge_port, Port}, + {proxy_name, ProxyName} + | Config + ]; false -> case os:getenv("IS_CI") of "yes" -> @@ -116,244 +56,152 @@ common_init(ConfigT) -> _ -> {skip, no_opents} end - end. - -opents_config(BridgeType, Config) -> - Port = integer_to_list(?config(opents_port, Config)), - Server = "http://" ++ ?config(opents_host, Config) ++ ":" ++ Port, - Name = atom_to_binary(?MODULE), - BatchSize = ?config(batch_size, Config), - ConfigString = - io_lib:format( - "bridges.~s.~s {\n" - " enable = true\n" - " server = ~p\n" - " resource_opts = {\n" - " request_ttl = 500ms\n" - " batch_size = ~b\n" - " query_mode = sync\n" - " }\n" - "}", - [ - BridgeType, - Name, - Server, - BatchSize - ] - ), - {Name, parse_and_check(ConfigString, BridgeType, Name)}. - -parse_and_check(ConfigString, BridgeType, Name) -> - {ok, RawConf} = hocon:binary(ConfigString, #{format => map}), - hocon_tconf:check_plain(emqx_bridge_schema, RawConf, #{required => false, atom_key => false}), - #{<<"bridges">> := #{BridgeType := #{Name := Config}}} = RawConf, + end; +init_per_group(_Group, Config) -> Config. -create_bridge(Config) -> - create_bridge(Config, _Overrides = #{}). +end_per_group(default, Config) -> + emqx_bridge_v2_testlib:end_per_group(Config), + ok; +end_per_group(_Group, _Config) -> + ok. -create_bridge(Config, Overrides) -> - BridgeType = ?config(opents_bridge_type, Config), - Name = ?config(opents_name, Config), - Config0 = ?config(opents_config, Config), - Config1 = emqx_utils_maps:deep_merge(Config0, Overrides), - emqx_bridge:create(BridgeType, Name, Config1). +init_per_testcase(TestCase, Config0) -> + Type = ?config(bridge_type, Config0), + UniqueNum = integer_to_binary(erlang:unique_integer()), + Name = << + (atom_to_binary(TestCase))/binary, UniqueNum/binary + >>, + {_ConfigString, ConnectorConfig} = connector_config(Name, Config0), + {_, ActionConfig} = action_config(Name, Config0), + Config = [ + {connector_type, Type}, + {connector_name, Name}, + {connector_config, ConnectorConfig}, + {bridge_type, Type}, + {bridge_name, Name}, + {bridge_config, ActionConfig} + | Config0 + ], + %% iotdb_reset(Config), + ok = snabbkaffe:start_trace(), + Config. -delete_bridge(Config) -> - BridgeType = ?config(opents_bridge_type, Config), - Name = ?config(opents_name, Config), - emqx_bridge:remove(BridgeType, Name). - -create_bridge_http(Params) -> - Path = emqx_mgmt_api_test_util:api_path(["bridges"]), - AuthHeader = emqx_mgmt_api_test_util:auth_header_(), - case emqx_mgmt_api_test_util:request_api(post, Path, "", AuthHeader, Params) of - {ok, Res} -> {ok, emqx_utils_json:decode(Res, [return_maps])}; - Error -> Error - end. - -send_message(Config, Payload) -> - Name = ?config(opents_name, Config), - BridgeType = ?config(opents_bridge_type, Config), - BridgeID = emqx_bridge_resource:bridge_id(BridgeType, Name), - emqx_bridge:send_message(BridgeID, Payload). - -query_resource(Config, Request) -> - query_resource(Config, Request, 1_000). - -query_resource(Config, Request, Timeout) -> - Name = ?config(opents_name, Config), - BridgeType = ?config(opents_bridge_type, Config), - ResourceID = emqx_bridge_resource:resource_id(BridgeType, Name), - emqx_resource:query(ResourceID, Request, #{timeout => Timeout}). +end_per_testcase(TestCase, Config) -> + emqx_bridge_v2_testlib:end_per_testcase(TestCase, Config). %%------------------------------------------------------------------------------ -%% Testcases +%% Helper fns %%------------------------------------------------------------------------------ -t_setup_via_config_and_publish(Config) -> - ?assertMatch( - {ok, _}, - create_bridge(Config) - ), - SentData = make_data(), - ?check_trace( - begin - {_, {ok, #{result := Result}}} = - ?wait_async_action( - send_message(Config, SentData), - #{?snk_kind := buffer_worker_flush_ack}, - 2_000 - ), - ?assertMatch( - {ok, 200, #{failed := 0, success := 1}}, Result - ), - ok - end, - fun(Trace0) -> - Trace = ?of_kind(opents_connector_query_return, Trace0), - ?assertMatch([#{result := {ok, 200, #{failed := 0, success := 1}}}], Trace), - ok - end - ), - ok. +action_config(Name, Config) -> + Type = ?config(bridge_type, Config), + ConfigString = + io_lib:format( + "actions.~s.~s {\n" + " enable = true\n" + " connector = \"~s\"\n" + " parameters = {\n" + " data = []\n" + " }\n" + "}\n", + [ + Type, + Name, + Name + ] + ), + ct:pal("ActionConfig:~ts~n", [ConfigString]), + {ConfigString, parse_action_and_check(ConfigString, Type, Name)}. -t_setup_via_http_api_and_publish(Config) -> - BridgeType = ?config(opents_bridge_type, Config), - Name = ?config(opents_name, Config), - OpentsConfig0 = ?config(opents_config, Config), - OpentsConfig = OpentsConfig0#{ - <<"name">> => Name, - <<"type">> => BridgeType - }, - ?assertMatch( - {ok, _}, - create_bridge_http(OpentsConfig) - ), - SentData = make_data(), - ?check_trace( - begin - Request = {send_message, SentData}, - Res0 = query_resource(Config, Request, 2_500), - ?assertMatch( - {ok, 200, #{failed := 0, success := 1}}, Res0 - ), - ok - end, - fun(Trace0) -> - Trace = ?of_kind(opents_connector_query_return, Trace0), - ?assertMatch([#{result := {ok, 200, #{failed := 0, success := 1}}}], Trace), - ok - end - ), - ok. +connector_config(Name, Config) -> + Host = ?config(bridge_host, Config), + Port = ?config(bridge_port, Config), + Type = ?config(bridge_type, Config), + ServerURL = opents_server_url(Host, Port), + ConfigString = + io_lib:format( + "connectors.~s.~s {\n" + " enable = true\n" + " server = \"~s\"\n" + "}\n", + [ + Type, + Name, + ServerURL + ] + ), + ct:pal("ConnectorConfig:~ts~n", [ConfigString]), + {ConfigString, parse_connector_and_check(ConfigString, Type, Name)}. -t_get_status(Config) -> - ?assertMatch( - {ok, _}, - create_bridge(Config) - ), +parse_action_and_check(ConfigString, BridgeType, Name) -> + parse_and_check(ConfigString, emqx_bridge_schema, <<"actions">>, BridgeType, Name). - Name = ?config(opents_name, Config), - BridgeType = ?config(opents_bridge_type, Config), - ResourceID = emqx_bridge_resource:resource_id(BridgeType, Name), +parse_connector_and_check(ConfigString, ConnectorType, Name) -> + parse_and_check( + ConfigString, emqx_connector_schema, <<"connectors">>, ConnectorType, Name + ). +%% emqx_utils_maps:safe_atom_key_map(Config). - ?assertEqual({ok, connected}, emqx_resource_manager:health_check(ResourceID)), - ok. +parse_and_check(ConfigString, SchemaMod, RootKey, Type0, Name) -> + Type = to_bin(Type0), + {ok, RawConf} = hocon:binary(ConfigString, #{format => map}), + hocon_tconf:check_plain(SchemaMod, RawConf, #{required => false, atom_key => false}), + #{RootKey := #{Type := #{Name := Config}}} = RawConf, + Config. -t_create_disconnected(Config) -> - BridgeType = proplists:get_value(bridge_type, Config, <<"opents">>), - Config1 = lists:keyreplace(opents_port, 1, Config, {opents_port, 61234}), - {_Name, OpenTSConf} = opents_config(BridgeType, Config1), +to_bin(List) when is_list(List) -> + unicode:characters_to_binary(List, utf8); +to_bin(Atom) when is_atom(Atom) -> + erlang:atom_to_binary(Atom); +to_bin(Bin) when is_binary(Bin) -> + Bin. - Config2 = lists:keyreplace(opents_config, 1, Config1, {opents_config, OpenTSConf}), - ?assertMatch({ok, _}, create_bridge(Config2)), +opents_server_url(Host, Port) -> + iolist_to_binary([ + "http://", + Host, + ":", + integer_to_binary(Port) + ]). - Name = ?config(opents_name, Config), - ResourceID = emqx_bridge_resource:resource_id(BridgeType, Name), - ?assertEqual({ok, disconnected}, emqx_resource_manager:health_check(ResourceID)), - ok. +is_success_check({ok, 200, #{failed := Failed}}) -> + ?assertEqual(0, Failed); +is_success_check(Ret) -> + ?assert(false, Ret). -t_write_failure(Config) -> - ProxyName = ?config(proxy_name, Config), - ProxyPort = ?config(proxy_port, Config), - ProxyHost = ?config(proxy_host, Config), - {ok, _} = create_bridge(Config), - SentData = make_data(), - emqx_common_test_helpers:with_failure(down, ProxyName, ProxyHost, ProxyPort, fun() -> - {_, {ok, #{result := Result}}} = - ?wait_async_action( - send_message(Config, SentData), - #{?snk_kind := buffer_worker_flush_ack}, - 2_000 - ), - ?assertMatch({error, _}, Result), - ok - end), - ok. +is_error_check(Result) -> + ?assertMatch({error, {400, #{failed := 1}}}, Result). -t_write_timeout(Config) -> - ProxyName = ?config(proxy_name, Config), - ProxyPort = ?config(proxy_port, Config), - ProxyHost = ?config(proxy_host, Config), - {ok, _} = create_bridge( - Config, - #{ - <<"resource_opts">> => #{ - <<"request_ttl">> => <<"500ms">>, - <<"resume_interval">> => <<"100ms">>, - <<"health_check_interval">> => <<"100ms">> +opentds_query(Config, Metric) -> + Path = <<"/api/query">>, + Opts = #{return_all => true}, + Body = #{ + start => <<"1h-ago">>, + queries => [ + #{ + aggregator => <<"last">>, + metric => Metric, + tags => #{ + host => <<"*">> + } } - } - ), - SentData = make_data(), - emqx_common_test_helpers:with_failure( - timeout, ProxyName, ProxyHost, ProxyPort, fun() -> - ?assertMatch( - {error, {resource_error, #{reason := timeout}}}, - query_resource(Config, {send_message, SentData}) - ) - end - ), - ok. + ], + showTSUID => false, + showQuery => false, + delete => false + }, + opentsdb_request(Config, Path, Body, Opts). -t_missing_data(Config) -> - ?assertMatch( - {ok, _}, - create_bridge(Config) - ), - {_, {ok, #{result := Result}}} = - ?wait_async_action( - send_message(Config, #{}), - #{?snk_kind := buffer_worker_flush_ack}, - 2_000 - ), - ?assertMatch( - {error, {400, #{failed := 1, success := 0}}}, - Result - ), - ok. +opentsdb_request(Config, Path, Body) -> + opentsdb_request(Config, Path, Body, #{}). -t_bad_data(Config) -> - ?assertMatch( - {ok, _}, - create_bridge(Config) - ), - Data = maps:without([metric], make_data()), - {_, {ok, #{result := Result}}} = - ?wait_async_action( - send_message(Config, Data), - #{?snk_kind := buffer_worker_flush_ack}, - 2_000 - ), - - ?assertMatch( - {error, {400, #{failed := 1, success := 0}}}, Result - ), - ok. - -make_data() -> - make_data(<<"cpu">>, 12). +opentsdb_request(Config, Path, Body, Opts) -> + Host = ?config(bridge_host, Config), + Port = ?config(bridge_port, Config), + ServerURL = opents_server_url(Host, Port), + URL = <>, + emqx_mgmt_api_test_util:request_api(post, URL, [], [], Body, Opts). make_data(Metric, Value) -> #{ @@ -363,3 +211,45 @@ make_data(Metric, Value) -> }, value => Value }. + +%%------------------------------------------------------------------------------ +%% Testcases +%%------------------------------------------------------------------------------ + +t_query_simple(Config) -> + Metric = <<"t_query_simple">>, + Value = 12, + MakeMessageFun = fun() -> make_data(Metric, Value) end, + ok = emqx_bridge_v2_testlib:t_sync_query( + Config, MakeMessageFun, fun is_success_check/1, opents_bridge_on_query + ), + {ok, {{_, 200, _}, _, IoTDBResult}} = opentds_query(Config, Metric), + QResult = emqx_utils_json:decode(IoTDBResult), + ?assertMatch( + [ + #{ + <<"metric">> := Metric, + <<"dps">> := _ + } + ], + QResult + ), + [#{<<"dps">> := Dps}] = QResult, + ?assertMatch([Value | _], maps:values(Dps)). + +t_create_via_http(Config) -> + emqx_bridge_v2_testlib:t_create_via_http(Config). + +t_start_stop(Config) -> + emqx_bridge_v2_testlib:t_start_stop(Config, opents_bridge_stopped). + +t_on_get_status(Config) -> + emqx_bridge_v2_testlib:t_on_get_status(Config, #{failure_status => connecting}). + +t_query_invalid_data(Config) -> + Metric = <<"t_query_invalid_data">>, + Value = 12, + MakeMessageFun = fun() -> maps:remove(value, make_data(Metric, Value)) end, + ok = emqx_bridge_v2_testlib:t_sync_query( + Config, MakeMessageFun, fun is_error_check/1, opents_bridge_on_query + ). diff --git a/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl b/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl index 655892d88..90c1ae1ce 100644 --- a/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl +++ b/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl @@ -52,6 +52,8 @@ resource_type(iotdb) -> emqx_bridge_iotdb_connector; resource_type(elasticsearch) -> emqx_bridge_es_connector; +resource_type(opents) -> + emqx_bridge_opents_connector; resource_type(Type) -> error({unknown_connector_type, Type}). @@ -66,6 +68,8 @@ connector_impl_module(iotdb) -> emqx_bridge_iotdb_connector; connector_impl_module(elasticsearch) -> emqx_bridge_es_connector; +connector_impl_module(opents) -> + emqx_bridge_opents_connector; connector_impl_module(_ConnectorType) -> undefined. @@ -193,6 +197,14 @@ connector_structs() -> desc => <<"ElasticSearch Connector Config">>, required => false } + )}, + {opents, + mk( + hoconsc:map(name, ref(emqx_bridge_opents_connector, "config_connector")), + #{ + desc => <<"OpenTSDB Connector Config">>, + required => false + } )} ]. @@ -212,7 +224,8 @@ schema_modules() -> emqx_postgresql_connector_schema, emqx_bridge_redis_schema, emqx_bridge_iotdb_connector, - emqx_bridge_es_connector + emqx_bridge_es_connector, + emqx_bridge_opents_connector ]. api_schemas(Method) -> @@ -241,7 +254,8 @@ api_schemas(Method) -> api_ref(emqx_postgresql_connector_schema, <<"pgsql">>, Method ++ "_connector"), api_ref(emqx_bridge_redis_schema, <<"redis">>, Method ++ "_connector"), api_ref(emqx_bridge_iotdb_connector, <<"iotdb">>, Method), - api_ref(emqx_bridge_es_connector, <<"elasticsearch">>, Method) + api_ref(emqx_bridge_es_connector, <<"elasticsearch">>, Method), + api_ref(emqx_bridge_opents_connector, <<"opents">>, Method) ]. api_ref(Module, Type, Method) -> diff --git a/apps/emqx_connector/src/schema/emqx_connector_schema.erl b/apps/emqx_connector/src/schema/emqx_connector_schema.erl index 615b89230..1829e04e6 100644 --- a/apps/emqx_connector/src/schema/emqx_connector_schema.erl +++ b/apps/emqx_connector/src/schema/emqx_connector_schema.erl @@ -154,7 +154,9 @@ connector_type_to_bridge_types(timescale) -> connector_type_to_bridge_types(iotdb) -> [iotdb]; connector_type_to_bridge_types(elasticsearch) -> - [elasticsearch]. + [elasticsearch]; +connector_type_to_bridge_types(opents) -> + [opents]. actions_config_name(action) -> <<"actions">>; actions_config_name(source) -> <<"sources">>. diff --git a/rel/i18n/emqx_bridge_opents.hocon b/rel/i18n/emqx_bridge_opents.hocon index ff44a9e18..5f1c4b0af 100644 --- a/rel/i18n/emqx_bridge_opents.hocon +++ b/rel/i18n/emqx_bridge_opents.hocon @@ -23,4 +23,35 @@ emqx_bridge_opents { desc_name.label: "Bridge Name" + +action_parameters_data.desc: +"""OpenTSDB action parameter data""" + +action_parameters_data.label: +"""Parameter Data""" + +config_parameters_timestamp.desc: +"""Timestamp. Placeholders in format of ${var} is supported""" + +config_parameters_timestamp.label: +"""Timestamp""" + +config_parameters_metric.metric: +"""Metric. Placeholders in format of ${var} is supported""" + +config_parameters_metric.metric: +"""Metric""" + +config_parameters_tags.desc: +"""Data Type, Placeholders in format of ${var} is supported""" + +config_parameters_tags.label: +"""Tags""" + +config_parameters_value.desc: +"""Value. Placeholders in format of ${var} is supported""" + +config_parameters_value.label: +"""Value""" + } diff --git a/rel/i18n/emqx_bridge_opents_connector.hocon b/rel/i18n/emqx_bridge_opents_connector.hocon index 5c39d1e0e..a54c240a0 100644 --- a/rel/i18n/emqx_bridge_opents_connector.hocon +++ b/rel/i18n/emqx_bridge_opents_connector.hocon @@ -17,4 +17,10 @@ emqx_bridge_opents_connector { details.label: "Details" + +desc_config.desc: +"""Configuration for OpenTSDB Connector.""" + +desc_config.label: +"""OpenTSDB Connector Configuration""" } From 83a88227980c85de3c206be16852d34b7a210e82 Mon Sep 17 00:00:00 2001 From: firest Date: Thu, 18 Jan 2024 22:25:58 +0800 Subject: [PATCH 76/89] chore(opents): bump version && update changes --- apps/emqx_bridge_opents/src/emqx_bridge_opents.app.src | 2 +- .../src/emqx_bridge_opents_connector.erl | 7 ++++++- changes/ee/feat-12353.en.md | 1 + rel/i18n/emqx_bridge_opents.hocon | 10 ++++++++-- 4 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 changes/ee/feat-12353.en.md diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents.app.src b/apps/emqx_bridge_opents/src/emqx_bridge_opents.app.src index 5e3b2f585..2469acaa8 100644 --- a/apps/emqx_bridge_opents/src/emqx_bridge_opents.app.src +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_opents, [ {description, "EMQX Enterprise OpenTSDB Bridge"}, - {vsn, "0.1.3"}, + {vsn, "0.1.4"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl b/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl index 6af1e2f55..e3fe9d6b4 100644 --- a/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl @@ -56,7 +56,10 @@ fields(config) -> fields("config_connector") -> emqx_connector_schema:common_fields() ++ - proplists_without([auto_reconnect], fields(config)); + proplists_without([auto_reconnect], fields(config)) ++ + emqx_connector_schema:resource_opts_ref(?MODULE, connector_resource_opts); +fields(connector_resource_opts) -> + emqx_connector_schema:resource_opts_fields(); fields("post") -> emqx_connector_schema:type_and_name_fields(enum([opents])) ++ fields("config_connector"); fields("put") -> @@ -66,6 +69,8 @@ fields("get") -> desc(config) -> ?DESC("desc_config"); +desc(connector_resource_opts) -> + ?DESC(emqx_resource_schema, "resource_opts"); desc("config_connector") -> ?DESC("desc_config"); desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> diff --git a/changes/ee/feat-12353.en.md b/changes/ee/feat-12353.en.md new file mode 100644 index 000000000..2d34e1211 --- /dev/null +++ b/changes/ee/feat-12353.en.md @@ -0,0 +1 @@ +The bridges for OpentsDB have been split so it is available via the connectors and actions APIs. They are still backwards compatible with the old bridge API. diff --git a/rel/i18n/emqx_bridge_opents.hocon b/rel/i18n/emqx_bridge_opents.hocon index 5f1c4b0af..f5d2ade85 100644 --- a/rel/i18n/emqx_bridge_opents.hocon +++ b/rel/i18n/emqx_bridge_opents.hocon @@ -24,6 +24,12 @@ emqx_bridge_opents { desc_name.label: "Bridge Name" +action_parameters.desc: +"""OpenTSDB action parameters""" + +action_parameters.label: +"""Parameters""" + action_parameters_data.desc: """OpenTSDB action parameter data""" @@ -36,10 +42,10 @@ config_parameters_timestamp.desc: config_parameters_timestamp.label: """Timestamp""" -config_parameters_metric.metric: +config_parameters_metric.desc: """Metric. Placeholders in format of ${var} is supported""" -config_parameters_metric.metric: +config_parameters_metric.label: """Metric""" config_parameters_tags.desc: From b44420c14f1b949086b21ef59c51340c3c5762ce Mon Sep 17 00:00:00 2001 From: firest Date: Tue, 23 Jan 2024 10:00:56 +0800 Subject: [PATCH 77/89] fix(opentsdb): Enhanced the type support for template data --- .../src/emqx_bridge_opents.erl | 18 +++- .../src/emqx_bridge_opents_connector.erl | 27 ++++-- .../test/emqx_bridge_opents_SUITE.erl | 88 ++++++++++++++++++- rel/i18n/emqx_bridge_opents.hocon | 2 +- 4 files changed, 124 insertions(+), 11 deletions(-) diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl b/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl index 7e490576f..119de1978 100644 --- a/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents.erl @@ -3,6 +3,7 @@ %%-------------------------------------------------------------------- -module(emqx_bridge_opents). +-include_lib("emqx/include/logger.hrl"). -include_lib("typerefl/include/types.hrl"). -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx_resource/include/emqx_resource.hrl"). @@ -156,12 +157,25 @@ fields(action_parameters_data) -> binary(), #{ required => true, - desc => ?DESC("config_parameters_tags") + desc => ?DESC("config_parameters_tags"), + validator => fun(Tmpl) -> + case emqx_placeholder:preproc_tmpl(Tmpl) of + [{var, _}] -> + true; + _ -> + ?SLOG(warning, #{ + msg => "invalid_tags_template", + path => "opents.parameters.data.tags", + data => Tmpl + }), + false + end + end } )}, {value, mk( - binary(), + hoconsc:union([integer(), float(), binary()]), #{ required => true, desc => ?DESC("config_parameters_value") diff --git a/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl b/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl index e3fe9d6b4..d71468d82 100644 --- a/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl +++ b/apps/emqx_bridge_opents/src/emqx_bridge_opents_connector.erl @@ -304,9 +304,14 @@ render_channel_message(Msg, #{data := DataList}, Acc) -> ValueVal = case ValueTk of [_] -> + %% just one element, maybe is a variable or a plain text + %% we should keep it as it is erlang:hd(emqx_placeholder:proc_tmpl(ValueTk, Msg, RawOpts)); - _ -> - emqx_placeholder:proc_tmpl(ValueTk, Msg) + Tks when is_list(Tks) -> + emqx_placeholder:proc_tmpl(ValueTk, Msg); + Raw -> + %% not a token list, just a raw value + Raw end, Base = #{metric => MetricVal, tags => TagsVal, value => ValueVal}, [ @@ -328,12 +333,20 @@ preproc_data_template([]) -> preproc_data_template(DataList) -> lists:map( fun(Data) -> - maps:map( - fun(_Key, Value) -> - emqx_placeholder:preproc_tmpl(Value) + {Value, Data2} = maps:take(value, Data), + Template = maps:map( + fun(_Key, Val) -> + emqx_placeholder:preproc_tmpl(Val) end, - Data - ) + Data2 + ), + + case Value of + Text when is_binary(Text) -> + Template#{value => emqx_placeholder:preproc_tmpl(Text)}; + Raw -> + Template#{value => Raw} + end end, DataList ). diff --git a/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl b/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl index f86ae6986..e3e89d563 100644 --- a/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl +++ b/apps/emqx_bridge_opents/test/emqx_bridge_opents_SUITE.erl @@ -83,7 +83,7 @@ init_per_testcase(TestCase, Config0) -> {bridge_config, ActionConfig} | Config0 ], - %% iotdb_reset(Config), + emqx_bridge_v2_testlib:delete_all_bridges_and_connectors(), ok = snabbkaffe:start_trace(), Config. @@ -253,3 +253,89 @@ t_query_invalid_data(Config) -> ok = emqx_bridge_v2_testlib:t_sync_query( Config, MakeMessageFun, fun is_error_check/1, opents_bridge_on_query ). + +t_tags_validator(Config) -> + %% Create without data configured + ?assertMatch({ok, _}, emqx_bridge_v2_testlib:create_bridge(Config)), + + ?assertMatch( + {ok, _}, + emqx_bridge_v2_testlib:update_bridge_api(Config, #{ + <<"parameters">> => #{ + <<"data">> => [ + #{ + <<"metric">> => <<"${metric}">>, + <<"tags">> => <<"${tags}">>, + <<"value">> => <<"${payload.value}">> + } + ] + } + }) + ), + + ?assertMatch( + {error, _}, + emqx_bridge_v2_testlib:update_bridge_api(Config, #{ + <<"parameters">> => #{ + <<"data">> => [ + #{ + <<"metric">> => <<"${metric}">>, + <<"tags">> => <<"text">>, + <<"value">> => <<"${payload.value}">> + } + ] + } + }) + ). + +t_raw_int_value(Config) -> + raw_value_test(<<"t_raw_int_value">>, 42, Config). + +t_raw_float_value(Config) -> + raw_value_test(<<"t_raw_float_value">>, 42.5, Config). + +raw_value_test(Metric, RawValue, Config) -> + ?assertMatch({ok, _}, emqx_bridge_v2_testlib:create_bridge(Config)), + ResourceId = emqx_bridge_v2_testlib:resource_id(Config), + BridgeId = emqx_bridge_v2_testlib:bridge_id(Config), + ?retry( + _Sleep = 1_000, + _Attempts = 10, + ?assertEqual({ok, connected}, emqx_resource_manager:health_check(ResourceId)) + ), + + ?assertMatch( + {ok, _}, + emqx_bridge_v2_testlib:update_bridge_api(Config, #{ + <<"parameters">> => #{ + <<"data">> => [ + #{ + <<"metric">> => <<"${metric}">>, + <<"tags">> => <<"${tags}">>, + <<"value">> => RawValue + } + ] + } + }) + ), + + Value = 12, + MakeMessageFun = fun() -> make_data(Metric, Value) end, + + is_success_check( + emqx_resource:simple_sync_query(ResourceId, {BridgeId, MakeMessageFun()}) + ), + + {ok, {{_, 200, _}, _, IoTDBResult}} = opentds_query(Config, Metric), + QResult = emqx_utils_json:decode(IoTDBResult), + ?assertMatch( + [ + #{ + <<"metric">> := Metric, + <<"dps">> := _ + } + ], + QResult + ), + [#{<<"dps">> := Dps}] = QResult, + ?assertMatch([RawValue | _], maps:values(Dps)). diff --git a/rel/i18n/emqx_bridge_opents.hocon b/rel/i18n/emqx_bridge_opents.hocon index f5d2ade85..ab2e82180 100644 --- a/rel/i18n/emqx_bridge_opents.hocon +++ b/rel/i18n/emqx_bridge_opents.hocon @@ -49,7 +49,7 @@ config_parameters_metric.label: """Metric""" config_parameters_tags.desc: -"""Data Type, Placeholders in format of ${var} is supported""" +"""Tags. Only supports with placeholder to extract tags from a variable""" config_parameters_tags.label: """Tags""" From 186e1591df60da24a68cf55452345df5a90c9c4c Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Tue, 23 Jan 2024 09:06:15 +0800 Subject: [PATCH 78/89] chore: document api-key path api only support bearerAuth --- apps/emqx/include/http_api.hrl | 1 + apps/emqx_dashboard/src/emqx_dashboard.erl | 7 ++++--- apps/emqx_dashboard/src/emqx_dashboard_api.erl | 1 + .../src/emqx_mgmt_api_api_keys.erl | 2 ++ apps/emqx_management/src/emqx_mgmt_auth.erl | 6 +++--- .../test/emqx_mgmt_api_api_keys_SUITE.erl | 17 ++++++++++++++++- rel/i18n/emqx_mgmt_api_api_keys.hocon | 10 +++++----- 7 files changed, 32 insertions(+), 12 deletions(-) diff --git a/apps/emqx/include/http_api.hrl b/apps/emqx/include/http_api.hrl index 0f6372584..f0c5611e9 100644 --- a/apps/emqx/include/http_api.hrl +++ b/apps/emqx/include/http_api.hrl @@ -17,6 +17,7 @@ %% HTTP API Auth -define(BAD_USERNAME_OR_PWD, 'BAD_USERNAME_OR_PWD'). -define(BAD_API_KEY_OR_SECRET, 'BAD_API_KEY_OR_SECRET'). +-define(API_KEY_NOT_ALLOW, 'API_KEY_NOT_ALLOW'). -define(API_KEY_NOT_ALLOW_MSG, <<"This API Key don't have permission to access this resource">>). %% Bad Request diff --git a/apps/emqx_dashboard/src/emqx_dashboard.erl b/apps/emqx_dashboard/src/emqx_dashboard.erl index 96f81ca84..a4438f6c7 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard.erl @@ -264,10 +264,11 @@ api_key_authorize(Req, Key, Secret) -> case emqx_mgmt_auth:authorize(Path, Req, Key, Secret) of ok -> {ok, #{auth_type => api_key, source => Key}}; - {error, <<"not_allowed">>} -> + {error, <<"not_allowed">>, Resource} -> return_unauthorized( - ?BAD_API_KEY_OR_SECRET, - <<"Not allowed, Check api_key/api_secret">> + ?API_KEY_NOT_ALLOW, + <<"Please use bearer Token instead, using API key/secret in ", Resource/binary, + " path is not permitted">> ); {error, unauthorized_role} -> {403, 'UNAUTHORIZED_ROLE', ?API_KEY_NOT_ALLOW_MSG}; diff --git a/apps/emqx_dashboard/src/emqx_dashboard_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_api.erl index 8a81f2116..d7ed5941f 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_api.erl @@ -89,6 +89,7 @@ schema("/logout") -> post => #{ tags => [<<"dashboard">>], desc => ?DESC(logout_api), + security => [#{'bearerAuth' => []}], parameters => sso_parameters(), 'requestBody' => fields([username]), responses => #{ diff --git a/apps/emqx_management/src/emqx_mgmt_api_api_keys.erl b/apps/emqx_management/src/emqx_mgmt_api_api_keys.erl index db20e9477..ad4b53401 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_api_keys.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_api_keys.erl @@ -40,6 +40,7 @@ schema("/api_key") -> get => #{ description => ?DESC(api_key_list), tags => ?TAGS, + security => [#{'bearerAuth' => []}], responses => #{ 200 => delete([api_secret], fields(app)) } @@ -47,6 +48,7 @@ schema("/api_key") -> post => #{ description => ?DESC(create_new_api_key), tags => ?TAGS, + security => [#{'bearerAuth' => []}], 'requestBody' => delete([created_at, api_key, api_secret], fields(app)), responses => #{ 200 => hoconsc:ref(app), diff --git a/apps/emqx_management/src/emqx_mgmt_auth.erl b/apps/emqx_management/src/emqx_mgmt_auth.erl index 559344e2b..7745207ce 100644 --- a/apps/emqx_management/src/emqx_mgmt_auth.erl +++ b/apps/emqx_management/src/emqx_mgmt_auth.erl @@ -184,11 +184,11 @@ list() -> to_map(ets:match_object(?APP, #?APP{_ = '_'})). authorize(<<"/api/v5/users", _/binary>>, _Req, _ApiKey, _ApiSecret) -> - {error, <<"not_allowed">>}; + {error, <<"not_allowed">>, <<"users">>}; authorize(<<"/api/v5/api_key", _/binary>>, _Req, _ApiKey, _ApiSecret) -> - {error, <<"not_allowed">>}; + {error, <<"not_allowed">>, <<"api_key">>}; authorize(<<"/api/v5/logout", _/binary>>, _Req, _ApiKey, _ApiSecret) -> - {error, <<"not_allowed">>}; + {error, <<"not_allowed">>, <<"logout">>}; authorize(_Path, Req, ApiKey, ApiSecret) -> Now = erlang:system_time(second), case find_by_api_key(ApiKey) of diff --git a/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl index d437e07c9..760ab1732 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_api_keys_SUITE.erl @@ -394,8 +394,23 @@ t_authorize(_Config) -> {ok, _Status} = emqx_mgmt_api_test_util:request_api(get, BanPath, BasicHeader), ?assertEqual(Unauthorized, emqx_mgmt_api_test_util:request_api(get, BanPath, KeyError)), ?assertEqual(Unauthorized, emqx_mgmt_api_test_util:request_api(get, BanPath, SecretError)), - ?assertEqual(Unauthorized, emqx_mgmt_api_test_util:request_api(get, ApiKeyPath, BasicHeader)), ?assertEqual(Unauthorized, emqx_mgmt_api_test_util:request_api(get, UserPath, BasicHeader)), + {error, {{"HTTP/1.1", 401, "Unauthorized"}, _Headers, Body}} = + emqx_mgmt_api_test_util:request_api( + get, + ApiKeyPath, + [], + BasicHeader, + [], + #{return_all => true} + ), + ?assertMatch( + #{ + <<"code">> := <<"API_KEY_NOT_ALLOW">>, + <<"message">> := _ + }, + emqx_utils_json:decode(Body, [return_maps]) + ), ?assertMatch( {ok, #{<<"api_key">> := _, <<"enable">> := false}}, diff --git a/rel/i18n/emqx_mgmt_api_api_keys.hocon b/rel/i18n/emqx_mgmt_api_api_keys.hocon index 8acbe60d0..4becd01aa 100644 --- a/rel/i18n/emqx_mgmt_api_api_keys.hocon +++ b/rel/i18n/emqx_mgmt_api_api_keys.hocon @@ -1,27 +1,27 @@ emqx_mgmt_api_api_keys { api_key_list.desc: -"""Return api_key list""" +"""Return api_key list. This API can only be requested using a bearer token.""" api_key_list.label: """Return api_key list""" create_new_api_key.desc: -"""Create new api_key""" +"""Create new api_key. This API can only be requested using a bearer token.""" create_new_api_key.label: """Create new api_key""" get_api_key.desc: -"""Return the specific api_key""" +"""Return the specific api_key. This API can only be requested using a bearer token.""" get_api_key.label: """Return the specific api_key""" update_api_key.desc: -"""Update the specific api_key""" +"""Update the specific api_key. This API can only be requested using a bearer token.""" update_api_key.label: """Update the specific api_key""" delete_api_key.desc: -"""Delete the specific api_key""" +"""Delete the specific api_key. This API can only be requested using a bearer token.""" delete_api_key.label: """Delete the specific api_key""" From 497e735bf48ff840406dad4d656a53720df1a3be Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Tue, 16 Jan 2024 14:39:41 +0800 Subject: [PATCH 79/89] ci: add env vars to run cassandra tests locally --- .../docker-compose-toxiproxy.yaml | 4 ++ .ci/docker-compose-file/toxiproxy.json | 12 ++++++ .../test/emqx_bridge_cassandra_SUITE.erl | 8 ++++ .../emqx_bridge_cassandra_connector_SUITE.erl | 40 +++++++++++-------- 4 files changed, 47 insertions(+), 17 deletions(-) diff --git a/.ci/docker-compose-file/docker-compose-toxiproxy.yaml b/.ci/docker-compose-file/docker-compose-toxiproxy.yaml index d648d9d78..568d9129c 100644 --- a/.ci/docker-compose-file/docker-compose-toxiproxy.yaml +++ b/.ci/docker-compose-file/docker-compose-toxiproxy.yaml @@ -39,6 +39,10 @@ services: - 19042:9042 # Cassandra TLS - 19142:9142 + # Cassandra No Auth + - 19043:9043 + # Cassandra TLS No Auth + - 19143:9143 # S3 - 19000:19000 # S3 TLS diff --git a/.ci/docker-compose-file/toxiproxy.json b/.ci/docker-compose-file/toxiproxy.json index c58474039..103bae924 100644 --- a/.ci/docker-compose-file/toxiproxy.json +++ b/.ci/docker-compose-file/toxiproxy.json @@ -96,6 +96,18 @@ "upstream": "cassandra:9142", "enabled": true }, + { + "name": "cassa_no_auth_tcp", + "listen": "0.0.0.0:9043", + "upstream": "cassandra_noauth:9042", + "enabled": true + }, + { + "name": "cassa_no_auth_tls", + "listen": "0.0.0.0:9143", + "upstream": "cassandra_noauth:9142", + "enabled": true + }, { "name": "sqlserver", "listen": "0.0.0.0:1433", diff --git a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl index 9df219296..e0e3900b0 100644 --- a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl +++ b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl @@ -11,6 +11,14 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +%% To run this test locally: +%% ./scripts/ct/run.sh --app apps/emqx_bridge_cassandra --only-up +%% PROFILE=emqx-enterprise PROXY_HOST=localhost CASSA_TLS_HOST=localhost \ +%% CASSA_TLS_PORT=19142 CASSA_TCP_HOST=localhost CASSA_TCP_NO_AUTH_HOST=localhost \ +%% CASSA_TCP_PORT=19042 CASSA_TCP_NO_AUTH_PORT=19043 \ +%% ./rebar3 ct --name 'test@127.0.0.1' -v --suite \ +%% apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE + % SQL definitions -define(SQL_BRIDGE, "insert into mqtt_msg_test(topic, payload, arrived) " diff --git a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl index de306e3f0..245110de6 100644 --- a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl +++ b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl @@ -14,20 +14,20 @@ -include_lib("emqx/include/emqx.hrl"). -include_lib("stdlib/include/assert.hrl"). +%% To run this test locally: +%% ./scripts/ct/run.sh --app apps/emqx_bridge_cassandra --only-up +%% PROFILE=emqx-enterprise PROXY_HOST=localhost CASSA_TLS_HOST=localhost \ +%% CASSA_TLS_PORT=9142 CASSA_TCP_HOST=localhost CASSA_TCP_NO_AUTH_HOST=localhost \ +%% CASSA_TCP_PORT=19042 CASSA_TCP_NO_AUTH_PORT=19043 \ +%% ./rebar3 ct --name 'test@127.0.0.1' -v --suite \ +%% apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE + %% Cassandra servers are defined at `.ci/docker-compose-file/docker-compose-cassandra.yaml` %% You can change it to `127.0.0.1`, if you run this SUITE locally -define(CASSANDRA_HOST, "cassandra"). -define(CASSANDRA_HOST_NOAUTH, "cassandra_noauth"). -define(CASSANDRA_RESOURCE_MOD, emqx_bridge_cassandra_connector). -%% This test SUITE requires a running cassandra instance. If you don't want to -%% bring up the whole CI infrastuctucture with the `scripts/ct/run.sh` script -%% you can create a cassandra instance with the following command (execute it -%% from root of the EMQX directory.). You also need to set ?CASSANDRA_HOST and -%% ?CASSANDRA_PORT to appropriate values. -%% -%% sudo docker run --rm -d --name cassandra --network host cassandra:3.11.14 - %% Cassandra default username & password once enable `authenticator: PasswordAuthenticator` %% in cassandra config -define(CASSA_USERNAME, <<"cassandra">>). @@ -45,14 +45,14 @@ groups() -> {noauth, [t_lifecycle]} ]. -cassandra_servers(CassandraHost) -> +cassandra_servers(CassandraHost, CassandraPort) -> lists:map( fun(#{hostname := Host, port := Port}) -> {Host, Port} end, emqx_schema:parse_servers( - iolist_to_binary([CassandraHost, ":", erlang:integer_to_list(?CASSANDRA_DEFAULT_PORT)]), - #{default_port => ?CASSANDRA_DEFAULT_PORT} + iolist_to_binary([CassandraHost, ":", erlang:integer_to_list(CassandraPort)]), + #{default_port => CassandraPort} ) ). @@ -63,25 +63,30 @@ init_per_suite(Config) -> Config. init_per_group(Group, Config) -> - {CassandraHost, AuthOpts} = + {CassandraHost, CassandraPort, AuthOpts} = case Group of auth -> - {?CASSANDRA_HOST, [{username, ?CASSA_USERNAME}, {password, ?CASSA_PASSWORD}]}; + TcpHost = os:getenv("CASSA_TCP_HOST", "toxiproxy"), + TcpPort = list_to_integer(os:getenv("CASSA_TCP_PORT", "9042")), + {TcpHost, TcpPort, [{username, ?CASSA_USERNAME}, {password, ?CASSA_PASSWORD}]}; noauth -> - {?CASSANDRA_HOST_NOAUTH, []} + TcpHost = os:getenv("CASSA_TCP_NO_AUTH_HOST", "toxiproxy"), + TcpPort = list_to_integer(os:getenv("CASSA_TCP_NO_AUTH_PORT", "9043")), + {TcpHost, TcpPort, []} end, - case emqx_common_test_helpers:is_tcp_server_available(CassandraHost, ?CASSANDRA_DEFAULT_PORT) of + case emqx_common_test_helpers:is_tcp_server_available(CassandraHost, CassandraPort) of true -> %% keyspace `mqtt` must be created in advance {ok, Conn} = ecql:connect([ - {nodes, cassandra_servers(CassandraHost)}, + {nodes, cassandra_servers(CassandraHost, CassandraPort)}, {keyspace, "mqtt"} | AuthOpts ]), ecql:close(Conn), [ {cassa_host, CassandraHost}, + {cassa_port, CassandraPort}, {cassa_auth_opts, AuthOpts} | Config ]; @@ -212,6 +217,7 @@ create_local_resource(ResourceId, CheckedConfig) -> cassandra_config(Config) -> Host = ?config(cassa_host, Config), + Port = ?config(cassa_port, Config), AuthOpts = maps:from_list(?config(cassa_auth_opts, Config)), CassConfig = AuthOpts#{ @@ -223,7 +229,7 @@ cassandra_config(Config) -> "~s:~b", [ Host, - ?CASSANDRA_DEFAULT_PORT + Port ] ) ) From 6a21766ce3af07fa25ebb58d9571809d4ae78d92 Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Fri, 19 Jan 2024 18:46:35 +0800 Subject: [PATCH 80/89] refactor: split cassandra bridges to actions and connectors --- apps/emqx_bridge/src/emqx_action_info.erl | 1 + .../src/emqx_bridge_cassandra.app.src | 4 +- .../src/emqx_bridge_cassandra.erl | 108 ++++++- .../src/emqx_bridge_cassandra_action_info.erl | 62 ++++ .../src/emqx_bridge_cassandra_connector.erl | 286 ++++++++---------- .../test/emqx_bridge_cassandra_SUITE.erl | 21 +- .../emqx_bridge_cassandra_connector_SUITE.erl | 4 - .../src/schema/emqx_connector_ee_schema.erl | 12 + .../src/schema/emqx_connector_schema.erl | 2 + rel/i18n/emqx_bridge_cassandra.hocon | 10 + .../emqx_bridge_cassandra_connector.hocon | 6 + 11 files changed, 332 insertions(+), 184 deletions(-) create mode 100644 apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_action_info.erl diff --git a/apps/emqx_bridge/src/emqx_action_info.erl b/apps/emqx_bridge/src/emqx_action_info.erl index 4f6228998..8e5a823e3 100644 --- a/apps/emqx_bridge/src/emqx_action_info.erl +++ b/apps/emqx_bridge/src/emqx_action_info.erl @@ -92,6 +92,7 @@ hard_coded_action_info_modules_ee() -> emqx_bridge_matrix_action_info, emqx_bridge_mongodb_action_info, emqx_bridge_influxdb_action_info, + emqx_bridge_cassandra_action_info, emqx_bridge_mysql_action_info, emqx_bridge_pgsql_action_info, emqx_bridge_syskeeper_action_info, diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src index 97be100d2..aa8290b98 100644 --- a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_cassandra, [ {description, "EMQX Enterprise Cassandra Bridge"}, - {vsn, "0.1.6"}, + {vsn, "0.2.0"}, {registered, []}, {applications, [ kernel, @@ -8,7 +8,7 @@ emqx_resource, ecql ]}, - {env, []}, + {env, [{emqx_action_info_modules, [emqx_bridge_cassandra_action_info]}]}, {modules, []}, {links, []} ]}. diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl index 2724b7c09..83268cab5 100644 --- a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.erl @@ -12,11 +12,17 @@ %% schema examples -export([ - conn_bridge_examples/1, values/2, fields/2 ]). +%% Examples +-export([ + bridge_v2_examples/1, + conn_bridge_examples/1, + connector_examples/1 +]). + %% schema -export([ namespace/0, @@ -26,10 +32,13 @@ ]). -define(DEFAULT_CQL, << - "insert into mqtt_msg(topic, msgid, sender, qos, payload, arrived, retain) " - "values (${topic}, ${id}, ${clientid}, ${qos}, ${payload}, ${timestamp}, ${flags.retain})" + "insert into mqtt_msg(msgid, topic, qos, payload, arrived) " + "values (${id}, ${topic}, ${qos}, ${payload}, ${timestamp})" >>). +-define(CONNECTOR_TYPE, cassandra). +-define(ACTION_TYPE, cassandra). + %%-------------------------------------------------------------------- %% schema examples @@ -43,6 +52,41 @@ conn_bridge_examples(Method) -> } ]. +bridge_v2_examples(Method) -> + ParamsExample = #{ + parameters => #{ + cql => ?DEFAULT_CQL + } + }, + [ + #{ + <<"cassandra">> => #{ + summary => <<"Cassandra Action">>, + value => emqx_bridge_v2_schema:action_values( + Method, cassandra, cassandra, ParamsExample + ) + } + } + ]. + +connector_examples(Method) -> + [ + #{ + <<"cassandra">> => #{ + summary => <<"Cassandra Connector">>, + value => emqx_connector_schema:connector_values( + Method, cassandra, #{ + servers => <<"127.0.0.1:9042">>, + keyspace => <<"mqtt">>, + username => <<"root">>, + password => <<"******">>, + pool_size => 8 + } + ) + } + } + ]. + %% no difference in get/post/put method values(_Method, Type) -> #{ @@ -73,14 +117,47 @@ namespace() -> "bridge_cassa". roots() -> []. +fields("config_connector") -> + emqx_connector_schema:common_fields() ++ + emqx_bridge_cassandra_connector:fields("connector") ++ + emqx_connector_schema:resource_opts_ref(?MODULE, connector_resource_opts); +fields(action) -> + {cassandra, + mk( + hoconsc:map(name, ref(?MODULE, cassandra_action)), + #{desc => <<"Cassandra Action Config">>, required => false} + )}; +fields(cassandra_action) -> + emqx_bridge_v2_schema:make_producer_action_schema( + mk(ref(?MODULE, action_parameters), #{ + required => true, desc => ?DESC(action_parameters) + }) + ); +fields(action_parameters) -> + [ + cql_field() + ]; +fields(connector_resource_opts) -> + emqx_connector_schema:resource_opts_fields(); +fields(Field) when + Field == "get_connector"; + Field == "put_connector"; + Field == "post_connector" +-> + Fields = + emqx_bridge_cassandra_connector:fields("connector") ++ + emqx_connector_schema:resource_opts_ref(?MODULE, connector_resource_opts), + emqx_connector_schema:api_fields(Field, ?CONNECTOR_TYPE, Fields); +fields(Field) when + Field == "get_bridge_v2"; + Field == "post_bridge_v2"; + Field == "put_bridge_v2" +-> + emqx_bridge_v2_schema:api_fields(Field, ?ACTION_TYPE, fields(cassandra_action)); fields("config") -> [ + cql_field(), {enable, mk(boolean(), #{desc => ?DESC("config_enable"), default => true})}, - {cql, - mk( - binary(), - #{desc => ?DESC("cql_template"), default => ?DEFAULT_CQL, format => <<"sql">>} - )}, {local_topic, mk( binary(), @@ -99,8 +176,23 @@ fields("get") -> fields("post", Type) -> [type_field(Type), name_field() | fields("config")]. +cql_field() -> + {cql, + mk( + binary(), + #{desc => ?DESC("cql_template"), default => ?DEFAULT_CQL, format => <<"sql">>} + )}. + desc("config") -> ?DESC("desc_config"); +desc(cassandra_action) -> + ?DESC(cassandra_action); +desc(action_parameters) -> + ?DESC(action_parameters); +desc("config_connector") -> + ?DESC("desc_config"); +desc(connector_resource_opts) -> + ?DESC(emqx_resource_schema, "resource_opts"); desc(Method) when Method =:= "get"; Method =:= "put"; Method =:= "post" -> ["Configuration for Cassandra using `", string:to_upper(Method), "` method."]; desc(_) -> diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_action_info.erl b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_action_info.erl new file mode 100644 index 000000000..14db7cf50 --- /dev/null +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_action_info.erl @@ -0,0 +1,62 @@ +-module(emqx_bridge_cassandra_action_info). + +-behaviour(emqx_action_info). + +-export([ + bridge_v1_config_to_action_config/2, + bridge_v1_config_to_connector_config/1, + connector_action_config_to_bridge_v1_config/2, + bridge_v1_type_name/0, + action_type_name/0, + connector_type_name/0, + schema_module/0 +]). + +-import(emqx_utils_conv, [bin/1]). + +-define(SCHEMA_MODULE, emqx_bridge_cassandra). + +bridge_v1_config_to_action_config(BridgeV1Config, ConnectorName) -> + ActionTopLevelKeys = schema_keys(cassandra_action), + ActionParametersKeys = schema_keys(action_parameters), + ActionKeys = ActionTopLevelKeys ++ ActionParametersKeys, + ActionConfig = make_config_map(ActionKeys, ActionParametersKeys, BridgeV1Config), + emqx_utils_maps:update_if_present( + <<"resource_opts">>, + fun emqx_bridge_v2_schema:project_to_actions_resource_opts/1, + ActionConfig#{<<"connector">> => ConnectorName} + ). + +bridge_v1_config_to_connector_config(BridgeV1Config) -> + ActionTopLevelKeys = schema_keys(cassandra_action), + ActionParametersKeys = schema_keys(action_parameters), + ActionKeys = ActionTopLevelKeys ++ ActionParametersKeys, + ConnectorTopLevelKeys = schema_keys("config_connector"), + ConnectorKeys = maps:keys(BridgeV1Config) -- (ActionKeys -- ConnectorTopLevelKeys), + ConnConfig0 = maps:with(ConnectorKeys, BridgeV1Config), + emqx_utils_maps:update_if_present( + <<"resource_opts">>, + fun emqx_connector_schema:project_to_connector_resource_opts/1, + ConnConfig0 + ). + +connector_action_config_to_bridge_v1_config(ConnectorRawConf, ActionRawConf) -> + RawConf = emqx_action_info:connector_action_config_to_bridge_v1_config( + ConnectorRawConf, ActionRawConf + ), + maps:without([<<"cassandra_type">>], RawConf). + +bridge_v1_type_name() -> cassandra. + +action_type_name() -> cassandra. + +connector_type_name() -> cassandra. + +schema_module() -> ?SCHEMA_MODULE. + +make_config_map(PickKeys, IndentKeys, Config) -> + Conf0 = maps:with(PickKeys, Config), + emqx_utils_maps:indent(<<"parameters">>, IndentKeys, Conf0). + +schema_keys(Name) -> + [bin(Key) || Key <- proplists:get_keys(?SCHEMA_MODULE:fields(Name))]. diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl index c6bc7098c..3db71c9e0 100644 --- a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl @@ -14,13 +14,17 @@ -include_lib("snabbkaffe/include/snabbkaffe.hrl"). %% schema --export([roots/0, fields/1]). +-export([roots/0, fields/1, desc/1]). %% callbacks of behaviour emqx_resource -export([ callback_mode/0, on_start/2, on_stop/2, + on_add_channel/4, + on_remove_channel/3, + on_get_channel_status/3, + on_get_channels/1, on_query/3, on_query_async/4, on_batch_query/3, @@ -28,6 +32,8 @@ on_get_status/2 ]). +-export([transform_bridge_v1_config_to_connector_config/1]). + %% callbacks of ecpool -export([ connect/1, @@ -39,16 +45,10 @@ -export([do_get_status/1]). --type prepares() :: #{atom() => binary()}. --type params_tokens() :: #{atom() => list()}. - -type state() :: #{ pool_name := binary(), - prepare_cql := prepares(), - params_tokens := params_tokens(), - %% returned by ecql:prepare/2 - prepare_statement := binary() + channels := #{} }. -define(DEFAULT_SERVER_OPTION, #{default_port => ?CASSANDRA_DEFAULT_PORT}). @@ -62,7 +62,9 @@ roots() -> fields(config) -> cassandra_db_fields() ++ emqx_connector_schema_lib:ssl_fields() ++ - emqx_connector_schema_lib:prepare_statement_fields(). + emqx_connector_schema_lib:prepare_statement_fields(); +fields("connector") -> + cassandra_db_fields() ++ emqx_connector_schema_lib:ssl_fields(). cassandra_db_fields() -> [ @@ -83,6 +85,11 @@ keyspace(desc) -> ?DESC("keyspace"); keyspace(required) -> true; keyspace(_) -> undefined. +desc(config) -> + ?DESC("config"); +desc("connector") -> + ?DESC("connector"). + %%-------------------------------------------------------------------- %% callbacks for emqx_resource @@ -130,10 +137,9 @@ on_start( false -> [] end, - State = parse_prepare_cql(Config), case emqx_resource_pool:start(InstId, ?MODULE, Options ++ SslOpts) of ok -> - {ok, init_prepare(State#{pool_name => InstId, prepare_statement => #{}})}; + {ok, #{pool_name => InstId, channels => #{}}}; {error, Reason} -> ?tp( cassandra_connector_start_failed, @@ -149,23 +155,49 @@ on_stop(InstId, _State) -> }), emqx_resource_pool:stop(InstId). +on_add_channel(_InstId, #{channels := Channs} = OldState, ChannId, ChannConf0) -> + #{parameters := #{cql := CQL}} = ChannConf0, + {PrepareCQL, ParamsTokens} = emqx_placeholder:preproc_sql(CQL, '?'), + ParsedCql = #{ + prepare_key => short_prepare_key(ChannId), + prepare_cql => PrepareCQL, + params_tokens => ParamsTokens + }, + NewChanns = Channs#{ChannId => #{parsed_cql => ParsedCql, prepare_result => not_prepared}}, + {ok, OldState#{channels => NewChanns}}. + +on_remove_channel(_InstanceId, #{channels := Channels} = State, ChannId) -> + NewState = State#{channels => maps:remove(ChannId, Channels)}, + {ok, NewState}. + +on_get_channel_status(InstanceId, ChannId, #{channels := Channels, pool_name := PoolName} = State) -> + case on_get_status(InstanceId, State) of + connected -> + #{parsed_cql := ParsedCql} = maps:get(ChannId, Channels), + case prepare_cql_to_cassandra(ParsedCql, PoolName) of + {ok, _} -> connected; + {error, Reason} -> {connecting, Reason} + end; + _ -> + connecting + end. + +on_get_channels(InstanceId) -> + emqx_bridge_v2:get_channels_for_connector(InstanceId). + -type request() :: % emqx_bridge.erl - {send_message, Params :: map()} + {ChannId :: binary(), Params :: map()} % common query - | {query, SQL :: binary()} - | {query, SQL :: binary(), Params :: map()}. + | {query, CQL :: binary()} + | {query, CQL :: binary(), Params :: map()}. -spec on_query( emqx_resource:resource_id(), request(), state() ) -> ok | {ok, ecql:cql_result()} | {error, {recoverable_error | unrecoverable_error, term()}}. -on_query( - InstId, - Request, - State -) -> +on_query(InstId, Request, State) -> do_single_query(InstId, Request, sync, State). -spec on_query_async( @@ -174,21 +206,11 @@ on_query( {function(), list()}, state() ) -> ok | {error, {recoverable_error | unrecoverable_error, term()}}. -on_query_async( - InstId, - Request, - Callback, - State -) -> +on_query_async(InstId, Request, Callback, State) -> do_single_query(InstId, Request, {async, Callback}, State). -do_single_query( - InstId, - Request, - Async, - #{pool_name := PoolName} = State -) -> - {Type, PreparedKeyOrSQL, Params} = parse_request_to_cql(Request), +do_single_query(InstId, Request, Async, #{pool_name := PoolName} = State) -> + {Type, PreparedKeyOrCQL, Params} = parse_request_to_cql(Request), ?tp( debug, cassandra_connector_received_cql_query, @@ -196,12 +218,12 @@ do_single_query( connector => InstId, type => Type, params => Params, - prepared_key_or_cql => PreparedKeyOrSQL, + prepared_key_or_cql => PreparedKeyOrCQL, state => State } ), - {PreparedKeyOrSQL1, Data} = proc_cql_params(Type, PreparedKeyOrSQL, Params, State), - Res = exec_cql_query(InstId, PoolName, Type, Async, PreparedKeyOrSQL1, Data), + {PreparedKeyOrCQL1, Data} = proc_cql_params(Type, PreparedKeyOrCQL, Params, State), + Res = exec_cql_query(InstId, PoolName, Type, Async, PreparedKeyOrCQL1, Data), handle_result(Res). -spec on_batch_query( @@ -209,11 +231,7 @@ do_single_query( [request()], state() ) -> ok | {error, {recoverable_error | unrecoverable_error, term()}}. -on_batch_query( - InstId, - Requests, - State -) -> +on_batch_query(InstId, Requests, State) -> do_batch_query(InstId, Requests, sync, State). -spec on_batch_query_async( @@ -222,25 +240,15 @@ on_batch_query( {function(), list()}, state() ) -> ok | {error, {recoverable_error | unrecoverable_error, term()}}. -on_batch_query_async( - InstId, - Requests, - Callback, - State -) -> +on_batch_query_async(InstId, Requests, Callback, State) -> do_batch_query(InstId, Requests, {async, Callback}, State). -do_batch_query( - InstId, - Requests, - Async, - #{pool_name := PoolName} = State -) -> +do_batch_query(InstId, Requests, Async, #{pool_name := PoolName} = State) -> CQLs = lists:map( fun(Request) -> - {Type, PreparedKeyOrSQL, Params} = parse_request_to_cql(Request), - proc_cql_params(Type, PreparedKeyOrSQL, Params, State) + {Type, PreparedKeyOrCQL, Params} = parse_request_to_cql(Request), + proc_cql_params(Type, PreparedKeyOrCQL, Params, State) end, Requests ), @@ -256,26 +264,24 @@ do_batch_query( Res = exec_cql_batch_query(InstId, PoolName, Async, CQLs), handle_result(Res). -parse_request_to_cql({send_message, Params}) -> - {prepared_query, _Key = send_message, Params}; -parse_request_to_cql({query, SQL}) -> - parse_request_to_cql({query, SQL, #{}}); -parse_request_to_cql({query, SQL, Params}) -> - {query, SQL, Params}. +parse_request_to_cql({query, CQL}) -> + {query, CQL, #{}}; +parse_request_to_cql({query, CQL, Params}) -> + {query, CQL, Params}; +parse_request_to_cql({ChannId, Params}) -> + {prepared_query, ChannId, Params}. -proc_cql_params( - prepared_query, - PreparedKey0, - Params, - #{prepare_statement := Prepares, params_tokens := ParamsTokens} -) -> - %% assert - _PreparedKey = maps:get(PreparedKey0, Prepares), - Tokens = maps:get(PreparedKey0, ParamsTokens), - {PreparedKey0, assign_type_for_params(emqx_placeholder:proc_sql(Tokens, Params))}; -proc_cql_params(query, SQL, Params, _State) -> - {SQL1, Tokens} = emqx_placeholder:preproc_sql(SQL, '?'), - {SQL1, assign_type_for_params(emqx_placeholder:proc_sql(Tokens, Params))}. +proc_cql_params(prepared_query, ChannId, Params, #{channels := Channs}) -> + #{ + parsed_cql := #{ + prepare_key := PrepareKey, + params_tokens := ParamsTokens + } + } = maps:get(ChannId, Channs), + {PrepareKey, assign_type_for_params(emqx_placeholder:proc_sql(ParamsTokens, Params))}; +proc_cql_params(query, CQL, Params, _State) -> + {CQL1, Tokens} = emqx_placeholder:preproc_sql(CQL, '?'), + {CQL1, assign_type_for_params(emqx_placeholder:proc_sql(Tokens, Params))}. exec_cql_query(InstId, PoolName, Type, Async, PreparedKey, Data) when Type == query; Type == prepared_query @@ -314,38 +320,15 @@ exec_cql_batch_query(InstId, PoolName, Async, CQLs) -> exec(PoolName, Query) -> ecpool:pick_and_do(PoolName, Query, no_handover). -on_get_status(_InstId, #{pool_name := PoolName} = State) -> +on_get_status(_InstId, #{pool_name := PoolName}) -> case emqx_resource_pool:health_check_workers(PoolName, fun ?MODULE:do_get_status/1) of - true -> - case do_check_prepares(State) of - ok -> - connected; - {ok, NState} -> - %% return new state with prepared statements - {connected, NState}; - false -> - %% do not log error, it is logged in prepare_cql_to_conn - connecting - end; - false -> - connecting + true -> connected; + false -> connecting end. do_get_status(Conn) -> ok == element(1, ecql:query(Conn, "SELECT cluster_name FROM system.local")). -do_check_prepares(#{prepare_cql := Prepares}) when is_map(Prepares) -> - ok; -do_check_prepares(State = #{pool_name := PoolName, prepare_cql := {error, Prepares}}) -> - %% retry to prepare - case prepare_cql(Prepares, PoolName) of - {ok, Sts} -> - %% remove the error - {ok, State#{prepare_cql => Prepares, prepare_statement := Sts}}; - _Error -> - false - end. - %%-------------------------------------------------------------------- %% callbacks query @@ -394,88 +377,50 @@ conn_opts([Opt | Opts], Acc) -> %%-------------------------------------------------------------------- %% prepare - -%% XXX: hardcode -%% note: the `cql` param is passed by emqx_bridge_cassandra -parse_prepare_cql(#{cql := SQL}) -> - parse_prepare_cql([{send_message, SQL}], #{}, #{}); -parse_prepare_cql(_) -> - #{prepare_cql => #{}, params_tokens => #{}}. - -parse_prepare_cql([{Key, H} | T], Prepares, Tokens) -> - {PrepareSQL, ParamsTokens} = emqx_placeholder:preproc_sql(H, '?'), - parse_prepare_cql( - T, Prepares#{Key => PrepareSQL}, Tokens#{Key => ParamsTokens} - ); -parse_prepare_cql([], Prepares, Tokens) -> - #{ - prepare_cql => Prepares, - params_tokens => Tokens - }. - -init_prepare(State = #{prepare_cql := Prepares, pool_name := PoolName}) -> - case maps:size(Prepares) of - 0 -> - State; - _ -> - case prepare_cql(Prepares, PoolName) of - {ok, Sts} -> - State#{prepare_statement := Sts}; - Error -> - ?tp( - error, - cassandra_prepare_cql_failed, - #{prepares => Prepares, reason => Error} - ), - %% mark the prepare_cql as failed - State#{prepare_cql => {error, Prepares}} - end - end. - -prepare_cql(Prepares, PoolName) when is_map(Prepares) -> - prepare_cql(maps:to_list(Prepares), PoolName); -prepare_cql(Prepares, PoolName) -> - case do_prepare_cql(Prepares, PoolName) of - {ok, _Sts} = Ok -> +prepare_cql_to_cassandra(ParsedCql, PoolName) -> + case prepare_cql_to_cassandra(ecpool:workers(PoolName), ParsedCql, #{}) of + {ok, Statement} -> %% prepare for reconnect - ecpool:add_reconnect_callback(PoolName, {?MODULE, prepare_cql_to_conn, [Prepares]}), - Ok; + ecpool:add_reconnect_callback(PoolName, {?MODULE, prepare_cql_to_conn, [ParsedCql]}), + {ok, Statement}; Error -> + ?tp( + error, + cassandra_prepare_cql_failed, + #{parsed_cql => ParsedCql, reason => Error} + ), Error end. -do_prepare_cql(Prepares, PoolName) -> - do_prepare_cql(ecpool:workers(PoolName), Prepares, #{}). - -do_prepare_cql([{_Name, Worker} | T], Prepares, _LastSts) -> +prepare_cql_to_cassandra([{_Name, Worker} | T], ParsedCql, _LastSts) -> {ok, Conn} = ecpool_worker:client(Worker), - case prepare_cql_to_conn(Conn, Prepares) of - {ok, Sts} -> - do_prepare_cql(T, Prepares, Sts); + case prepare_cql_to_conn(Conn, ParsedCql) of + {ok, Statement} -> + prepare_cql_to_cassandra(T, ParsedCql, Statement); Error -> Error end; -do_prepare_cql([], _Prepares, LastSts) -> +prepare_cql_to_cassandra([], _ParsedCql, LastSts) -> {ok, LastSts}. -prepare_cql_to_conn(Conn, Prepares) -> - prepare_cql_to_conn(Conn, Prepares, #{}). - -prepare_cql_to_conn(Conn, [], Statements) when is_pid(Conn) -> {ok, Statements}; -prepare_cql_to_conn(Conn, [{Key, SQL} | PrepareList], Statements) when is_pid(Conn) -> - ?SLOG(info, #{msg => "cassandra_prepare_cql", name => Key, prepare_cql => SQL}), - case ecql:prepare(Conn, Key, SQL) of +prepare_cql_to_conn(Conn, #{prepare_key := PrepareKey, prepare_cql := PrepareCQL}) when + is_pid(Conn) +-> + ?SLOG(info, #{ + msg => "cassandra_prepare_cql", prepare_key => PrepareKey, prepare_cql => PrepareCQL + }), + case ecql:prepare(Conn, PrepareKey, PrepareCQL) of {ok, Statement} -> - prepare_cql_to_conn(Conn, PrepareList, Statements#{Key => Statement}); - {error, Error} = Other -> + {ok, Statement}; + {error, Reason} = Error -> ?SLOG(error, #{ msg => "cassandra_prepare_cql_failed", worker_pid => Conn, - name => Key, - prepare_cql => SQL, - error => Error + name => PrepareKey, + prepare_cql => PrepareCQL, + reason => Reason }), - Other + Error end. handle_result({error, disconnected}) -> @@ -487,6 +432,9 @@ handle_result({error, Error}) -> handle_result(Res) -> Res. +transform_bridge_v1_config_to_connector_config(_) -> + ok. + %%-------------------------------------------------------------------- %% utils @@ -513,3 +461,11 @@ maybe_assign_type(V) when is_integer(V) -> maybe_assign_type(V) when is_float(V) -> {double, V}; maybe_assign_type(V) -> V. + +short_prepare_key(Str) when is_binary(Str) -> + true = size(Str) > 0, + Sha = crypto:hash(sha, Str), + %% TODO: change to binary:encode_hex(X, lowercase) when OTP version is always > 25 + Hex = string:lowercase(binary:encode_hex(Sha)), + <> = Hex, + binary_to_atom(<<"cassa_prepare_key:", UniqueEnough/binary>>). diff --git a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl index e0e3900b0..77aec7d99 100644 --- a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl +++ b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl @@ -301,17 +301,28 @@ send_message(Config, Payload) -> query_resource(Config, Request) -> Name = ?config(cassa_name, Config), BridgeType = ?config(cassa_bridge_type, Config), - ResourceID = emqx_bridge_resource:resource_id(BridgeType, Name), - emqx_resource:query(ResourceID, Request, #{timeout => 1_000}). + BridgeV2Id = emqx_bridge_v2:id(BridgeType, Name), + ConnectorResId = emqx_connector_resource:resource_id( + cassandra, <<"connector_emqx_bridge_cassandra_SUITE">> + ), + emqx_resource:query(BridgeV2Id, Request, #{ + timeout => 1_000, connector_resource_id => ConnectorResId + }). query_resource_async(Config, Request) -> Name = ?config(cassa_name, Config), BridgeType = ?config(cassa_bridge_type, Config), Ref = alias([reply]), AsyncReplyFun = fun(Result) -> Ref ! {result, Ref, Result} end, - ResourceID = emqx_bridge_resource:resource_id(BridgeType, Name), - Return = emqx_resource:query(ResourceID, Request, #{ - timeout => 500, async_reply_fun => {AsyncReplyFun, []} + BridgeV2Id = emqx_bridge_v2:id(BridgeType, Name), + ConnectorResId = emqx_connector_resource:resource_id( + cassandra, <<"connector_emqx_bridge_cassandra_SUITE">> + ), + Return = emqx_resource:query(BridgeV2Id, Request, #{ + timeout => 500, + async_reply_fun => {AsyncReplyFun, []}, + connector_resource_id => ConnectorResId, + query_mode => async }), {Return, Ref}. diff --git a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl index 245110de6..50d82397a 100644 --- a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl +++ b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl @@ -22,10 +22,6 @@ %% ./rebar3 ct --name 'test@127.0.0.1' -v --suite \ %% apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE -%% Cassandra servers are defined at `.ci/docker-compose-file/docker-compose-cassandra.yaml` -%% You can change it to `127.0.0.1`, if you run this SUITE locally --define(CASSANDRA_HOST, "cassandra"). --define(CASSANDRA_HOST_NOAUTH, "cassandra_noauth"). -define(CASSANDRA_RESOURCE_MOD, emqx_bridge_cassandra_connector). %% Cassandra default username & password once enable `authenticator: PasswordAuthenticator` diff --git a/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl b/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl index 90c1ae1ce..077723538 100644 --- a/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl +++ b/apps/emqx_connector/src/schema/emqx_connector_ee_schema.erl @@ -36,6 +36,8 @@ resource_type(mongodb) -> emqx_bridge_mongodb_connector; resource_type(influxdb) -> emqx_bridge_influxdb_connector; +resource_type(cassandra) -> + emqx_bridge_cassandra_connector; resource_type(mysql) -> emqx_bridge_mysql_connector; resource_type(pgsql) -> @@ -134,6 +136,14 @@ connector_structs() -> required => false } )}, + {cassandra, + mk( + hoconsc:map(name, ref(emqx_bridge_cassandra, "config_connector")), + #{ + desc => <<"Cassandra Connector Config">>, + required => false + } + )}, {mysql, mk( hoconsc:map(name, ref(emqx_bridge_mysql, "config_connector")), @@ -217,6 +227,7 @@ schema_modules() -> emqx_bridge_matrix, emqx_bridge_mongodb, emqx_bridge_influxdb, + emqx_bridge_cassandra, emqx_bridge_mysql, emqx_bridge_syskeeper_connector, emqx_bridge_syskeeper_proxy, @@ -247,6 +258,7 @@ api_schemas(Method) -> api_ref(emqx_bridge_matrix, <<"matrix">>, Method ++ "_connector"), api_ref(emqx_bridge_mongodb, <<"mongodb">>, Method ++ "_connector"), api_ref(emqx_bridge_influxdb, <<"influxdb">>, Method ++ "_connector"), + api_ref(emqx_bridge_cassandra, <<"cassandra">>, Method ++ "_connector"), api_ref(emqx_bridge_mysql, <<"mysql">>, Method ++ "_connector"), api_ref(emqx_bridge_syskeeper_connector, <<"syskeeper_forwarder">>, Method), api_ref(emqx_bridge_syskeeper_proxy, <<"syskeeper_proxy">>, Method), diff --git a/apps/emqx_connector/src/schema/emqx_connector_schema.erl b/apps/emqx_connector/src/schema/emqx_connector_schema.erl index 1829e04e6..3c5fdfc03 100644 --- a/apps/emqx_connector/src/schema/emqx_connector_schema.erl +++ b/apps/emqx_connector/src/schema/emqx_connector_schema.erl @@ -137,6 +137,8 @@ connector_type_to_bridge_types(mongodb) -> [mongodb, mongodb_rs, mongodb_sharded, mongodb_single]; connector_type_to_bridge_types(influxdb) -> [influxdb, influxdb_api_v1, influxdb_api_v2]; +connector_type_to_bridge_types(cassandra) -> + [cassandra]; connector_type_to_bridge_types(mysql) -> [mysql]; connector_type_to_bridge_types(mqtt) -> diff --git a/rel/i18n/emqx_bridge_cassandra.hocon b/rel/i18n/emqx_bridge_cassandra.hocon index a96315340..29eb35de5 100644 --- a/rel/i18n/emqx_bridge_cassandra.hocon +++ b/rel/i18n/emqx_bridge_cassandra.hocon @@ -1,5 +1,15 @@ emqx_bridge_cassandra { +action_parameters.desc: +"""Action specific configs.""" +action_parameters.label: +"""Action""" + +cassandra_action.desc: +"""Action configs.""" +cassandra_action.label: +"""Action""" + config_enable.desc: """Enable or disable this bridge""" diff --git a/rel/i18n/emqx_bridge_cassandra_connector.hocon b/rel/i18n/emqx_bridge_cassandra_connector.hocon index b149cce8a..40e1c0e22 100644 --- a/rel/i18n/emqx_bridge_cassandra_connector.hocon +++ b/rel/i18n/emqx_bridge_cassandra_connector.hocon @@ -1,5 +1,11 @@ emqx_bridge_cassandra_connector { +config.desc: +"""Cassandra connection config""" + +config.label: +"""Connection config""" + keyspace.desc: """Keyspace name to connect to.""" From 5cd9f495588037bebbc8fc36892b8b3ce99b40bc Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Sun, 21 Jan 2024 21:00:38 +0800 Subject: [PATCH 81/89] ci: update generated connector name --- .../test/emqx_bridge_cassandra_SUITE.erl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl index 77aec7d99..09deaa699 100644 --- a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl +++ b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_SUITE.erl @@ -302,9 +302,7 @@ query_resource(Config, Request) -> Name = ?config(cassa_name, Config), BridgeType = ?config(cassa_bridge_type, Config), BridgeV2Id = emqx_bridge_v2:id(BridgeType, Name), - ConnectorResId = emqx_connector_resource:resource_id( - cassandra, <<"connector_emqx_bridge_cassandra_SUITE">> - ), + ConnectorResId = emqx_connector_resource:resource_id(BridgeType, Name), emqx_resource:query(BridgeV2Id, Request, #{ timeout => 1_000, connector_resource_id => ConnectorResId }). @@ -315,9 +313,7 @@ query_resource_async(Config, Request) -> Ref = alias([reply]), AsyncReplyFun = fun(Result) -> Ref ! {result, Ref, Result} end, BridgeV2Id = emqx_bridge_v2:id(BridgeType, Name), - ConnectorResId = emqx_connector_resource:resource_id( - cassandra, <<"connector_emqx_bridge_cassandra_SUITE">> - ), + ConnectorResId = emqx_connector_resource:resource_id(BridgeType, Name), Return = emqx_resource:query(BridgeV2Id, Request, #{ timeout => 500, async_reply_fun => {AsyncReplyFun, []}, From f0cde3fc5bc42b36327635fc37fa07cdec21ef5a Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Mon, 22 Jan 2024 15:45:00 +0800 Subject: [PATCH 82/89] chore: update change logs for cassandra bridge_v2 --- changes/ee/feat-12330.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ee/feat-12330.en.md diff --git a/changes/ee/feat-12330.en.md b/changes/ee/feat-12330.en.md new file mode 100644 index 000000000..963098659 --- /dev/null +++ b/changes/ee/feat-12330.en.md @@ -0,0 +1 @@ +The bridges for Cassandra have been split so they are available via the connectors and actions APIs. They are still backwards compatible with the old bridge API. From e4c683d6f8e54d2d36380f658c4ad4a151f1c63b Mon Sep 17 00:00:00 2001 From: Shawn <506895667@qq.com> Date: Tue, 23 Jan 2024 14:14:23 +0800 Subject: [PATCH 83/89] chore: update ecql to 0.6.0 --- apps/emqx_bridge_cassandra/rebar.config | 2 +- .../src/emqx_bridge_cassandra_connector.erl | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/apps/emqx_bridge_cassandra/rebar.config b/apps/emqx_bridge_cassandra/rebar.config index c0a72fef9..04ee603fa 100644 --- a/apps/emqx_bridge_cassandra/rebar.config +++ b/apps/emqx_bridge_cassandra/rebar.config @@ -2,7 +2,7 @@ {erl_opts, [debug_info]}. {deps, [ - {ecql, {git, "https://github.com/emqx/ecql.git", {tag, "v0.5.2"}}}, + {ecql, {git, "https://github.com/emqx/ecql.git", {tag, "v0.6.0"}}}, {emqx_connector, {path, "../../apps/emqx_connector"}}, {emqx_resource, {path, "../../apps/emqx_resource"}}, {emqx_bridge, {path, "../../apps/emqx_bridge"}} diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl index 3db71c9e0..3b30f1d26 100644 --- a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl @@ -159,7 +159,7 @@ on_add_channel(_InstId, #{channels := Channs} = OldState, ChannId, ChannConf0) - #{parameters := #{cql := CQL}} = ChannConf0, {PrepareCQL, ParamsTokens} = emqx_placeholder:preproc_sql(CQL, '?'), ParsedCql = #{ - prepare_key => short_prepare_key(ChannId), + prepare_key => make_prepare_key(ChannId), prepare_cql => PrepareCQL, params_tokens => ParamsTokens }, @@ -462,10 +462,5 @@ maybe_assign_type(V) when is_float(V) -> {double, V}; maybe_assign_type(V) -> V. -short_prepare_key(Str) when is_binary(Str) -> - true = size(Str) > 0, - Sha = crypto:hash(sha, Str), - %% TODO: change to binary:encode_hex(X, lowercase) when OTP version is always > 25 - Hex = string:lowercase(binary:encode_hex(Sha)), - <> = Hex, - binary_to_atom(<<"cassa_prepare_key:", UniqueEnough/binary>>). +make_prepare_key(ChannId) -> + ChannId. From 9003bc5b7228210a67eeaaede59f8865a7336097 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Mon, 22 Jan 2024 18:18:17 -0300 Subject: [PATCH 84/89] perf(ds): inherit only LTS paths containing wildcards when adding a new generation Fixes https://github.com/emqx/emqx/pull/12338#discussion_r1462139499 --- apps/emqx_durable_storage/src/emqx_ds_lts.erl | 148 +++++++++++++++++- .../src/emqx_ds_storage_bitfield_lts.erl | 27 ++-- 2 files changed, 152 insertions(+), 23 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_lts.erl index 9d87cf571..226af62f0 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_lts.erl @@ -20,7 +20,7 @@ -export([ trie_create/1, trie_create/0, trie_restore/2, - trie_restore_existing/2, + trie_copy_learned_paths/2, topic_key/3, match_topics/2, lookup_topic_key/2 @@ -120,10 +120,6 @@ trie_create() -> -spec trie_restore(options(), [{_Key, _Val}]) -> trie(). trie_restore(Options, Dump) -> Trie = trie_create(Options), - trie_restore_existing(Trie, Dump). - --spec trie_restore_existing(trie(), [{_Key, _Val}]) -> trie(). -trie_restore_existing(Trie, Dump) -> lists:foreach( fun({{StateFrom, Token}, StateTo}) -> trie_insert(Trie, StateFrom, Token, StateTo) @@ -132,6 +128,17 @@ trie_restore_existing(Trie, Dump) -> ), Trie. +-spec trie_copy_learned_paths(trie(), trie()) -> trie(). +trie_copy_learned_paths(OldTrie, NewTrie) -> + WildcardPaths = [P || P <- paths(OldTrie), contains_wildcard(P)], + lists:foreach( + fun({{StateFrom, Token}, StateTo}) -> + trie_insert(NewTrie, StateFrom, Token, StateTo) + end, + lists:flatten(WildcardPaths) + ), + NewTrie. + %% @doc Lookup the topic key. Create a new one, if not found. -spec topic_key(trie(), threshold_fun(), [binary() | '']) -> msg_storage_key(). topic_key(Trie, ThresholdFun, Tokens) -> @@ -385,6 +392,41 @@ emanating(#trie{trie = Tab}, State, Token) when is_binary(Token); Token =:= '' - ets:lookup(Tab, {State, Token}) ]. +all_emanating(#trie{trie = Tab}, State) -> + ets:select( + Tab, + ets:fun2ms(fun(#trans{key = {S, Edge}, next = Next}) when S == State -> + {{S, Edge}, Next} + end) + ). + +paths(#trie{} = T) -> + Roots = all_emanating(T, ?PREFIX), + lists:flatmap( + fun({Segment, Next}) -> + follow_path(T, Next, [{Segment, Next}]) + end, + Roots + ). + +follow_path(#trie{} = T, State, Path) -> + lists:flatmap( + fun + ({{_State, ?EOT}, _Next} = Segment) -> + [lists:reverse([Segment | Path])]; + ({_Edge, Next} = Segment) -> + follow_path(T, Next, [Segment | Path]) + end, + all_emanating(T, State) + ). + +contains_wildcard([{{_State, ?PLUS}, _Next} | _Rest]) -> + true; +contains_wildcard([_ | Rest]) -> + contains_wildcard(Rest); +contains_wildcard([]) -> + false. + %%================================================================================ %% Tests %%================================================================================ @@ -636,4 +678,100 @@ test_key(Trie, Threshold, Topic0) -> {ok, Ret} = lookup_topic_key(Trie, Topic), Ret. +paths_test() -> + T = trie_create(), + Threshold = 4, + ThresholdFun = fun + (0) -> 1000; + (_) -> Threshold + end, + PathsToInsert = + [ + [''], + [1], + [2, 2], + [3, 3, 3], + [2, 3, 4] + ] ++ [[4, I, 4] || I <- lists:seq(1, Threshold + 2)] ++ + [['', I, ''] || I <- lists:seq(1, Threshold + 2)], + lists:foreach( + fun(PathSpec) -> + test_key(T, ThresholdFun, PathSpec) + end, + PathsToInsert + ), + + %% Test that the paths we've inserted are produced in the output + Paths = paths(T), + FormattedPaths = lists:map(fun format_path/1, Paths), + ExpectedWildcardPaths = + [ + [4, '+', 4], + ['', '+', ''] + ], + ExpectedPaths = + [ + [''], + [1], + [2, 2], + [3, 3, 3] + ] ++ [[4, I, 4] || I <- lists:seq(1, Threshold)] ++ + [['', I, ''] || I <- lists:seq(1, Threshold)] ++ + ExpectedWildcardPaths, + FormatPathSpec = + fun(PathSpec) -> + lists:map( + fun + (I) when is_integer(I) -> integer_to_binary(I); + (A) -> A + end, + PathSpec + ) ++ [?EOT] + end, + lists:foreach( + fun(PathSpec) -> + Path = FormatPathSpec(PathSpec), + ?assert( + lists:member(Path, FormattedPaths), + #{ + paths => FormattedPaths, + expected_path => Path + } + ) + end, + ExpectedPaths + ), + + %% Test filter function for paths containing wildcards + WildcardPaths = lists:filter(fun contains_wildcard/1, Paths), + FormattedWildcardPaths = lists:map(fun format_path/1, WildcardPaths), + ?assertEqual( + sets:from_list(FormattedWildcardPaths, [{version, 2}]), + sets:from_list(lists:map(FormatPathSpec, ExpectedWildcardPaths), [{version, 2}]), + #{ + expected => ExpectedWildcardPaths, + wildcards => FormattedWildcardPaths + } + ), + + %% Test that we're able to reconstruct the same trie from the paths + T2 = trie_create(), + [ + trie_insert(T2, State, Edge, Next) + || Path <- Paths, + {{State, Edge}, Next} <- Path + ], + #trie{trie = Tab1} = T, + #trie{trie = Tab2} = T2, + Dump1 = sets:from_list(ets:tab2list(Tab1), [{version, 2}]), + Dump2 = sets:from_list(ets:tab2list(Tab2), [{version, 2}]), + ?assertEqual(Dump1, Dump2), + + ok. + +format_path([{{_State, Edge}, _Next} | Rest]) -> + [Edge | format_path(Rest)]; +format_path([]) -> + []. + -endif. diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl index 2a3086a57..d407dab41 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_bitfield_lts.erl @@ -205,17 +205,15 @@ open(_Shard, DBHandle, GenId, CFRefs, Schema) -> s(). post_creation_actions( #{ - db := DBHandle, - old_gen_id := OldGenId, - old_cf_refs := OldCFRefs, - new_gen_runtime_data := NewGenData0 + new_gen_runtime_data := NewGenData, + old_gen_runtime_data := OldGenData } ) -> - {_, OldTrieCF} = lists:keyfind(trie_cf(OldGenId), 1, OldCFRefs), - #s{trie = NewTrie0} = NewGenData0, - NewTrie = copy_previous_trie(DBHandle, NewTrie0, OldTrieCF), + #s{trie = OldTrie} = OldGenData, + #s{trie = NewTrie0} = NewGenData, + NewTrie = copy_previous_trie(OldTrie, NewTrie0), ?tp(bitfield_lts_inherited_trie, #{}), - NewGenData0#s{trie = NewTrie}. + NewGenData#s{trie = NewTrie}. -spec drop( emqx_ds_storage_layer:shard_id(), @@ -533,16 +531,9 @@ restore_trie(TopicIndexBytes, DB, CF) -> rocksdb:iterator_close(IT) end. --spec copy_previous_trie(rocksdb:db_handle(), emqx_ds_lts:trie(), rocksdb:cf_handle()) -> - emqx_ds_lts:trie(). -copy_previous_trie(DBHandle, NewTrie, OldCF) -> - {ok, IT} = rocksdb:iterator(DBHandle, OldCF, []), - try - OldDump = read_persisted_trie(IT, rocksdb:iterator_move(IT, first)), - emqx_ds_lts:trie_restore_existing(NewTrie, OldDump) - after - rocksdb:iterator_close(IT) - end. +-spec copy_previous_trie(emqx_ds_lts:trie(), emqx_ds_lts:trie()) -> emqx_ds_lts:trie(). +copy_previous_trie(OldTrie, NewTrie) -> + emqx_ds_lts:trie_copy_learned_paths(OldTrie, NewTrie). read_persisted_trie(IT, {ok, KeyB, ValB}) -> [ From 024ffe29092dee3ee3841dc97574cad6a8578757 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Tue, 23 Jan 2024 09:47:03 -0300 Subject: [PATCH 85/89] test(ds): reduce flakiness --- .../test/emqx_persistent_messages_SUITE.erl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index 73c88adc8..36c8848cf 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -438,10 +438,19 @@ t_message_gc(Config) -> TopicFilter = emqx_topic:words(<<"#">>), StartTime = 0, Msgs = consume(TopicFilter, StartTime), - %% only "1" and "2" should have been GC'ed - ?assertEqual( - sets:from_list([<<"3">>, <<"4">>], [{version, 2}]), - sets:from_list([emqx_message:payload(Msg) || Msg <- Msgs], [{version, 2}]) + %% "1" and "2" should have been GC'ed + PresentMessages = sets:from_list( + [emqx_message:payload(Msg) || Msg <- Msgs], + [{version, 2}] + ), + ?assert( + sets:is_empty( + sets:intersection( + PresentMessages, + sets:from_list([<<"1">>, <<"2">>], [{version, 2}]) + ) + ), + #{present_messages => PresentMessages} ), ok From 3897f5bc877b87c381d05a77ad356f7d888f3d80 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Jan 2024 03:44:31 +0000 Subject: [PATCH 86/89] chore(deps): bump the actions-package-macos group Bumps the actions-package-macos group in /.github/actions/package-macos with 1 update: [actions/cache](https://github.com/actions/cache). Updates `actions/cache` from 3.3.3 to 4.0.0 - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/e12d46a63a90f2fae62d114769bbf2a179198b5c...13aacd865c20de90d75de3b17ebe84f7a17d57d2) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-major dependency-group: actions-package-macos ... Signed-off-by: dependabot[bot] --- .github/actions/package-macos/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/package-macos/action.yaml b/.github/actions/package-macos/action.yaml index 64d179b46..1553576b2 100644 --- a/.github/actions/package-macos/action.yaml +++ b/.github/actions/package-macos/action.yaml @@ -51,7 +51,7 @@ runs: echo "SELF_HOSTED=false" >> $GITHUB_OUTPUT ;; esac - - uses: actions/cache@e12d46a63a90f2fae62d114769bbf2a179198b5c # v3.3.3 + - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 id: cache if: steps.prepare.outputs.SELF_HOSTED != 'true' with: From 9afb5f661cbde30d1143f321e74a1c946cf25935 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 24 Jan 2024 21:53:23 +0800 Subject: [PATCH 87/89] build: direct paths to avoid wildcard traversal of the _build dir - erlfmt always try loop through files in the `--exclude-files` dir --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 0ccd48410..47e665de2 100644 --- a/Makefile +++ b/Makefile @@ -316,10 +316,9 @@ $(foreach tt,$(ALL_ELIXIR_TGZS),$(eval $(call gen-elixir-tgz-target,$(tt)))) .PHONY: fmt fmt: $(REBAR) @$(SCRIPTS)/erlfmt -w 'apps/*/{src,include,priv,test,integration_test}/**/*.{erl,hrl,app.src,eterm}' - @$(SCRIPTS)/erlfmt -w '**/*.escript' --exclude-files '_build/**' - @$(SCRIPTS)/erlfmt -w '**/rebar.config' --exclude-files '_build/**' - @$(SCRIPTS)/erlfmt -w 'rebar.config.erl' - @$(SCRIPTS)/erlfmt -w 'bin/nodetool' + @$(SCRIPTS)/erlfmt -w 'apps/*/rebar.config' 'apps/emqx/rebar.config.script' '.ci/fvt_tests/http_server/rebar.config' + @$(SCRIPTS)/erlfmt -w 'rebar.config' 'rebar.config.erl' + @$(SCRIPTS)/erlfmt -w 'scripts/*.escript' 'bin/*.escript' 'bin/nodetool' @mix format .PHONY: clean-test-cluster-config From 846ad42a65279f786db832181c14c214adc51e71 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Wed, 24 Jan 2024 11:39:14 -0300 Subject: [PATCH 88/89] fix(ds): don't use env var for data dir default value Fixes https://github.com/emqx/emqx/pull/12380 --- apps/emqx/src/emqx_persistent_message.erl | 4 ++-- apps/emqx/src/emqx_schema.erl | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_message.erl b/apps/emqx/src/emqx_persistent_message.erl index d725c9b2c..8e3755fdb 100644 --- a/apps/emqx/src/emqx_persistent_message.erl +++ b/apps/emqx/src/emqx_persistent_message.erl @@ -61,13 +61,13 @@ force_ds() -> emqx_config:get([session_persistence, force_persistence]). storage_backend(#{ - builtin := #{ + builtin := Opts = #{ enable := true, - data_dir := DataDir, n_shards := NShards, replication_factor := ReplicationFactor } }) -> + DataDir = maps:get(data_dir, Opts, emqx:data_dir()), #{ backend => builtin, data_dir => DataDir, diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 56d575bd9..bbca13172 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -1896,11 +1896,8 @@ fields("session_storage_backend_builtin") -> string(), #{ desc => ?DESC(session_builtin_data_dir), - default => <<"${EMQX_DATA_DIR}">>, - importance => ?IMPORTANCE_LOW, - converter => fun(Path, Opts) -> - naive_env_interpolation(ensure_unicode_path(Path, Opts)) - end + required => false, + importance => ?IMPORTANCE_LOW } )}, {"n_shards", From aeefbe95447ee4340ef355c36266b2912f76692a Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 24 Jan 2024 18:42:24 +0100 Subject: [PATCH 89/89] fix(ds): Cache database metadata in RAM --- .../src/emqx_ds_replication_layer_meta.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl index 38c2dbbe7..16c52f20e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl @@ -388,7 +388,7 @@ ensure_tables() -> {rlog_shard, ?SHARD}, {majority, Majority}, {type, ordered_set}, - {storage, rocksdb_copies}, + {storage, disc_copies}, {record_name, ?META_TAB}, {attributes, record_info(fields, ?META_TAB)} ]), @@ -396,7 +396,7 @@ ensure_tables() -> {rlog_shard, ?SHARD}, {majority, Majority}, {type, ordered_set}, - {storage, rocksdb_copies}, + {storage, disc_copies}, {record_name, ?NODE_TAB}, {attributes, record_info(fields, ?NODE_TAB)} ]),