From a35698009cf8065f86b940dd0d7d1e78c117d24f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 16:25:54 +0800 Subject: [PATCH 01/38] feat(dashboard): endpoint `/monitor_current` provides mor fields - `retained_msg_count` Current retained_msg_count on each node and should be same on all nodes. - `license_quota` Only for enterprise edition, provides the max limited connections num. --- .../src/emqx_dashboard_monitor.erl | 22 ++++++++++++++++++- .../src/emqx_dashboard_monitor_api.erl | 14 +++++++++++- apps/emqx_retainer/src/emqx_retainer_api.erl | 6 ++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index ad95e8678..7916a6b58 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -173,7 +173,9 @@ handle_call(current_rate, _From, State = #state{last = Last}) -> NowTime = erlang:system_time(millisecond), NowSamplers = sample(NowTime), Rate = cal_rate(NowSamplers, Last), - {reply, {ok, Rate}, State}; + NonRateValue = non_rate_value(), + Samples = maps:merge(Rate, NonRateValue), + {reply, {ok, Samples}, State}; handle_call(_Request, _From, State = #state{}) -> {reply, ok, State}. @@ -409,3 +411,21 @@ stats(received_bytes) -> emqx_metrics:val('bytes.received'); stats(sent) -> emqx_metrics:val('messages.sent'); stats(sent_bytes) -> emqx_metrics:val('bytes.sent'); stats(dropped) -> emqx_metrics:val('messages.dropped'). + +%% ------------------------------------------------------------------------------------------------- +%% Retained && License Quota + +%% the non rate values should be same on all nodes +non_rate_value() -> + #{ + retained_msg_count => emqx_retainer:retained_count(), + license_quota => license_quota() + }. + +license_quota() -> + case emqx_license_checker:limits() of + {ok, #{max_connections := Quota}} -> + Quota; + {error, no_license} -> + 0 + end. diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl index a152531f1..309137362 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl @@ -1,5 +1,17 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2019-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_dashboard_monitor_api). diff --git a/apps/emqx_retainer/src/emqx_retainer_api.erl b/apps/emqx_retainer/src/emqx_retainer_api.erl index bb232f9e4..6d5eee477 100644 --- a/apps/emqx_retainer/src/emqx_retainer_api.erl +++ b/apps/emqx_retainer/src/emqx_retainer_api.erl @@ -44,7 +44,11 @@ api_spec() -> emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}). paths() -> - [?PREFIX, ?PREFIX ++ "/messages", ?PREFIX ++ "/message/:topic"]. + [ + ?PREFIX, + ?PREFIX ++ "/messages", + ?PREFIX ++ "/message/:topic" + ]. schema(?PREFIX) -> #{ From 3d6b65acedda637eacea1e4e65722f36dcd3c18b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 16:30:16 +0800 Subject: [PATCH 02/38] chore: emqx_dashboard_monitor:current_rate/0 not exported anymore --- .../src/emqx_dashboard_monitor.erl | 66 +++++++++++-------- .../src/emqx_dashboard_monitor_api.erl | 4 ++ 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index 7916a6b58..692c7a62e 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -40,11 +40,14 @@ -export([ samplers/0, samplers/2, - current_rate/0, current_rate/1, granularity_adapter/1 ]). +-ifdef(TEST). +-export([current_rate_cluster/0]). +-endif. + %% for rpc -export([do_sample/2]). @@ -112,8 +115,33 @@ granularity_adapter(List) when length(List) > 1000 -> granularity_adapter(List) -> List. +current_rate(all) -> + current_rate_cluster(); +current_rate(Node) when Node == node() -> + try + {ok, Rate} = do_call(current_rate), + {ok, Rate} + catch + _E:R -> + ?SLOG(warning, #{msg => "dashboard_monitor_error", reason => R}), + %% Rate map 0, ensure api will not crash. + %% When joining cluster, dashboard monitor restart. + Rate0 = [ + {Key, 0} + || Key <- ?GAUGE_SAMPLER_LIST ++ maps:values(?DELTA_SAMPLER_RATE_MAP) + ], + {ok, maps:merge(maps:from_list(Rate0), non_rate_value())} + end; +current_rate(Node) -> + case emqx_dashboard_proto_v1:current_rate(Node) of + {badrpc, Reason} -> + {badrpc, {Node, Reason}}; + {ok, Rate} -> + {ok, Rate} + end. + %% Get the current rate. Not the current sampler data. -current_rate() -> +current_rate_cluster() -> Fun = fun (Node, Cluster) when is_map(Cluster) -> @@ -133,31 +161,6 @@ current_rate() -> {ok, Rate} end. -current_rate(all) -> - current_rate(); -current_rate(Node) when Node == node() -> - try - {ok, Rate} = do_call(current_rate), - {ok, Rate} - catch - _E:R -> - ?SLOG(warning, #{msg => "dashboard_monitor_error", reason => R}), - %% Rate map 0, ensure api will not crash. - %% When joining cluster, dashboard monitor restart. - Rate0 = [ - {Key, 0} - || Key <- ?GAUGE_SAMPLER_LIST ++ maps:values(?DELTA_SAMPLER_RATE_MAP) - ], - {ok, maps:from_list(Rate0)} - end; -current_rate(Node) -> - case emqx_dashboard_proto_v1:current_rate(Node) of - {badrpc, Reason} -> - {badrpc, {Node, Reason}}; - {ok, Rate} -> - {ok, Rate} - end. - %% ------------------------------------------------------------------------------------------------- %% gen_server functions @@ -258,8 +261,13 @@ merge_cluster_sampler_map(M1, M2) -> merge_cluster_rate(Node, Cluster) -> Fun = fun - (topics, Value, NCluster) -> - NCluster#{topics => Value}; + %% cluster-synced values + (topics, V, NCluster) -> + NCluster#{topics => V}; + (retained_msg_count, V, NCluster) -> + NCluster#{retained_msg_count => V}; + (license_quota, V, NCluster) -> + NCluster#{license_quota => V}; (Key, Value, NCluster) -> ClusterValue = maps:get(Key, NCluster, 0), NCluster#{Key => Value + ClusterValue} diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl index 309137362..fc4b171a4 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl @@ -151,7 +151,11 @@ monitor_current(get, #{bindings := Bindings}) -> RawNode = maps:get(node, Bindings, <<"all">>), emqx_utils_api:with_node_or_cluster(RawNode, fun current_rate/1). +-spec current_rate(atom()) -> + {error, term()} + | {ok, Result :: map()}. current_rate(Node) -> + %% Node :: 'all' or `NodeName` case emqx_dashboard_monitor:current_rate(Node) of {badrpc, _} = BadRpc -> {error, BadRpc}; From 7bc3a5090d71701dfee5ba785ed8fda5a3248854 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 12 Jan 2024 04:36:14 +0800 Subject: [PATCH 03/38] fix(dashboard): meck `emqx_retainer` in SUITE --- apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl | 2 -- apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl index fc4b171a4..d7e3c094c 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor_api.erl @@ -145,8 +145,6 @@ dashboard_samplers_fun(Latest) -> end end. -monitor_current(get, #{bindings := []}) -> - emqx_utils_api:with_node_or_cluster(erlang:node(), fun emqx_dashboard_monitor:current_rate/1); monitor_current(get, #{bindings := Bindings}) -> RawNode = maps:get(node, Bindings, <<"all">>), emqx_utils_api:with_node_or_cluster(RawNode, fun current_rate/1). diff --git a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl index 71e559647..8b02ae20f 100644 --- a/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl +++ b/apps/emqx_dashboard/test/emqx_dashboard_monitor_SUITE.erl @@ -31,10 +31,13 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> + meck:new(emqx_retainer, [non_strict, passthrough, no_history, no_link]), + meck:expect(emqx_retainer, retained_count, fun() -> 0 end), emqx_mgmt_api_test_util:init_suite([]), Config. end_per_suite(_Config) -> + meck:unload([emqx_retainer]), emqx_mgmt_api_test_util:end_suite([]). t_monitor_samplers_all(_Config) -> @@ -198,5 +201,5 @@ waiting_emqx_stats_and_monitor_update(WaitKey) -> end, meck:unload([emqx_stats]), %% manually call monitor update - _ = emqx_dashboard_monitor:current_rate(), + _ = emqx_dashboard_monitor:current_rate_cluster(), ok. From ecd0da9fde718f2ed880ee368965be0789facdc4 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 17:05:04 +0800 Subject: [PATCH 04/38] feat(dashboard): `/monitor_current/nodes/{node}` provide `node_uptime` --- apps/emqx_dashboard/src/emqx_dashboard_monitor.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index 692c7a62e..b21712497 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -268,6 +268,9 @@ merge_cluster_rate(Node, Cluster) -> NCluster#{retained_msg_count => V}; (license_quota, V, NCluster) -> NCluster#{license_quota => V}; + %% for cluster sample, ignore node_uptime + (node_uptime, _V, NCluster) -> + NCluster; (Key, Value, NCluster) -> ClusterValue = maps:get(Key, NCluster, 0), NCluster#{Key => Value + ClusterValue} @@ -427,7 +430,8 @@ stats(dropped) -> emqx_metrics:val('messages.dropped'). non_rate_value() -> #{ retained_msg_count => emqx_retainer:retained_count(), - license_quota => license_quota() + license_quota => license_quota(), + node_uptime => emqx_sys:uptime() }. license_quota() -> From 89128958ed99b9e1ae4a98975b6db33e25a97ce7 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sat, 13 Jan 2024 16:48:12 +0800 Subject: [PATCH 05/38] fix(dashboard): licence quota not provided to ce edition --- apps/emqx_dashboard/src/emqx_dashboard_monitor.erl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl index b21712497..4891b5293 100644 --- a/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl +++ b/apps/emqx_dashboard/src/emqx_dashboard_monitor.erl @@ -428,16 +428,20 @@ stats(dropped) -> emqx_metrics:val('messages.dropped'). %% the non rate values should be same on all nodes non_rate_value() -> - #{ + (license_quota())#{ retained_msg_count => emqx_retainer:retained_count(), - license_quota => license_quota(), node_uptime => emqx_sys:uptime() }. +-if(?EMQX_RELEASE_EDITION == ee). license_quota() -> case emqx_license_checker:limits() of {ok, #{max_connections := Quota}} -> - Quota; + #{license_quota => Quota}; {error, no_license} -> - 0 + #{license_quota => 0} end. +-else. +license_quota() -> + #{}. +-endif. From 81ba166b4974f7f27e3b5158ada877f20620a100 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 19:09:33 +0800 Subject: [PATCH 06/38] feat(license): expiry epoch api --- apps/emqx_license/src/emqx_license.app.src | 2 +- apps/emqx_license/src/emqx_license_checker.erl | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/apps/emqx_license/src/emqx_license.app.src b/apps/emqx_license/src/emqx_license.app.src index 8d11c6522..7b80af601 100644 --- a/apps/emqx_license/src/emqx_license.app.src +++ b/apps/emqx_license/src/emqx_license.app.src @@ -1,6 +1,6 @@ {application, emqx_license, [ {description, "EMQX License"}, - {vsn, "5.0.14"}, + {vsn, "5.0.15"}, {modules, []}, {registered, [emqx_license_sup]}, {applications, [kernel, stdlib, emqx_ctl]}, diff --git a/apps/emqx_license/src/emqx_license_checker.erl b/apps/emqx_license/src/emqx_license_checker.erl index 198814fb9..b1547b52e 100644 --- a/apps/emqx_license/src/emqx_license_checker.erl +++ b/apps/emqx_license/src/emqx_license_checker.erl @@ -30,6 +30,7 @@ start_link/2, update/1, dump/0, + expiry_epoch/0, purge/0, limits/0, print_warnings/1 @@ -67,6 +68,10 @@ update(License) -> dump() -> gen_server:call(?MODULE, dump, infinity). +-spec expiry_epoch() -> integer(). +expiry_epoch() -> + gen_server:call(?MODULE, expiry_epoch, infinity). + -spec limits() -> {ok, limits()} | {error, any()}. limits() -> try ets:lookup(?LICENSE_TAB, limits) of @@ -111,6 +116,9 @@ handle_call({update, License}, _From, #{license := Old} = State) -> {reply, check_license(License), State1#{license => License}}; handle_call(dump, _From, #{license := License} = State) -> {reply, emqx_license_parser:dump(License), State}; +handle_call(expiry_epoch, _From, #{license := License} = State) -> + ExpiryEpoch = date_to_expiry_epoch(emqx_license_parser:expiry_date(License)), + {reply, ExpiryEpoch, State}; handle_call(purge, _From, State) -> _ = ets:delete_all_objects(?LICENSE_TAB), {reply, ok, State}; @@ -234,6 +242,11 @@ small_customer_overdue(_CType, _DaysLeft) -> false. non_official_license_overdue(?OFFICIAL, _) -> false; non_official_license_overdue(_, DaysLeft) -> DaysLeft < 0. +%% 62167219200 =:= calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}). +-define(EPOCH_START, 62167219200). +date_to_expiry_epoch({Y, M, D}) -> + calendar:datetime_to_gregorian_seconds({{Y, M, D}, {0, 0, 0}}) - ?EPOCH_START. + apply_limits(Limits) -> ets:insert(?LICENSE_TAB, {limits, Limits}). From fae0cea17f8be7520ff738585e114487f49d75b9 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 23:48:28 +0800 Subject: [PATCH 07/38] fix(license): make dialyzer happy --- apps/emqx_license/src/emqx_license_checker.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/emqx_license/src/emqx_license_checker.erl b/apps/emqx_license/src/emqx_license_checker.erl index b1547b52e..88bc79f90 100644 --- a/apps/emqx_license/src/emqx_license_checker.erl +++ b/apps/emqx_license/src/emqx_license_checker.erl @@ -244,6 +244,7 @@ non_official_license_overdue(_, DaysLeft) -> DaysLeft < 0. %% 62167219200 =:= calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}). -define(EPOCH_START, 62167219200). +-spec date_to_expiry_epoch(calendar:date()) -> Seconds :: non_neg_integer(). date_to_expiry_epoch({Y, M, D}) -> calendar:datetime_to_gregorian_seconds({{Y, M, D}, {0, 0, 0}}) - ?EPOCH_START. From 558c4713927a507ee3948eb4bddfb9a3842f609d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 11 Jan 2024 22:27:55 +0800 Subject: [PATCH 08/38] feat(prometheus): license expiry at epoch as gauge --- apps/emqx_prometheus/src/emqx_prometheus.erl | 21 ++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index d513e2c37..327586996 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -173,7 +173,9 @@ collect_mf(_Registry, Callback) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), + LicenseData = emqx_license_data(), ClusterData = emqx_cluster_data(), + _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], _ = [add_collect_family(Name, ClusterData, Callback, gauge) || Name <- emqx_cluster()], @@ -192,7 +194,9 @@ collect(<<"json">>) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), + LicenseData = emqx_license_data(), #{ + license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()]), stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), packets => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_packets()]), @@ -506,7 +510,6 @@ emqx_collect(emqx_authentication_failure, Stats) -> counter_metric(?C('authentication.failure', Stats)); %%-------------------------------------------------------------------- %% VM - emqx_collect(emqx_vm_cpu_use, VMData) -> gauge_metric(?C(cpu_use, VMData)); emqx_collect(emqx_vm_cpu_idle, VMData) -> @@ -522,7 +525,11 @@ emqx_collect(emqx_vm_used_memory, VMData) -> emqx_collect(emqx_cluster_nodes_running, ClusterData) -> gauge_metric(?C(nodes_running, ClusterData)); emqx_collect(emqx_cluster_nodes_stopped, ClusterData) -> - gauge_metric(?C(nodes_stopped, ClusterData)). + gauge_metric(?C(nodes_stopped, ClusterData)); +%%-------------------------------------------------------------------- +%% License +emqx_collect(emqx_license_expiry_at, LicenseData) -> + gauge_metric(?C(expiry_at, LicenseData)). %%-------------------------------------------------------------------- %% Indicators @@ -687,6 +694,16 @@ emqx_cluster_data() -> {nodes_stopped, length(Stopped)} ]. +emqx_license() -> + [ + emqx_license_expiry_at + ]. + +emqx_license_data() -> + [ + {expiry_at, emqx_license_checker:expiry_epoch()} + ]. + %% deprecated_since 5.0.10, remove this when 5.1.x do_start() -> emqx_prometheus_sup:start_child(?APP). From 092159b07106868c674564229fb926a51632ee30 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 12 Jan 2024 03:44:57 +0800 Subject: [PATCH 09/38] feat(prometheus): cert expiry epoch in endpoint `/prometheus/stats` --- apps/emqx_prometheus/src/emqx_prometheus.erl | 135 +++++++++++++++++- .../src/emqx_prometheus_api.erl | 2 +- 2 files changed, 131 insertions(+), 6 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 327586996..3ac32a47c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -24,6 +24,7 @@ -include("emqx_prometheus.hrl"). +-include_lib("public_key/include/public_key.hrl"). -include_lib("prometheus/include/prometheus_model.hrl"). -include_lib("emqx/include/logger.hrl"). @@ -32,6 +33,7 @@ [ create_mf/5, gauge_metric/1, + gauge_metrics/1, counter_metric/1 ] ). @@ -175,7 +177,10 @@ collect_mf(_Registry, Callback) -> VMData = emqx_vm_data(), LicenseData = emqx_license_data(), ClusterData = emqx_cluster_data(), + CertsData = emqx_certs_data(), + %% TODO: license expiry epoch and cert expiry epoch should be cached _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], + _ = [add_collect_family(Name, CertsData, Callback, gauge) || Name <- emqx_certs()], _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], _ = [add_collect_family(Name, ClusterData, Callback, gauge) || Name <- emqx_cluster()], @@ -195,8 +200,13 @@ collect(<<"json">>) -> Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), LicenseData = emqx_license_data(), + %% TODO: FIXME! + %% emqx_metrics_olp()), + %% emqx_metrics_acl()), + %% emqx_metrics_authn()), #{ license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()]), + certs => collect_certs_json(emqx_certs_data()), stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), packets => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_packets()]), @@ -223,10 +233,7 @@ collect_metrics(Name, Metrics) -> emqx_collect(Name, Metrics). add_collect_family(Name, Data, Callback, Type) -> - Callback(create_schema(Name, <<"">>, Data, Type)). - -create_schema(Name, Help, Data, Type) -> - create_mf(Name, Help, Type, ?MODULE, Data). + Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). %%-------------------------------------------------------------------- %% Collector @@ -529,7 +536,11 @@ emqx_collect(emqx_cluster_nodes_stopped, ClusterData) -> %%-------------------------------------------------------------------- %% License emqx_collect(emqx_license_expiry_at, LicenseData) -> - gauge_metric(?C(expiry_at, LicenseData)). + gauge_metric(?C(expiry_at, LicenseData)); +%%-------------------------------------------------------------------- +%% Certs +emqx_collect(emqx_cert_expiry_at, CertsData) -> + gauge_metrics(CertsData). %%-------------------------------------------------------------------- %% Indicators @@ -704,6 +715,120 @@ emqx_license_data() -> {expiry_at, emqx_license_checker:expiry_epoch()} ]. +emqx_certs() -> + [ + emqx_cert_expiry_at + ]. + +-define(LISTENER_TYPES, [ssl, wss, quic]). + +-spec emqx_certs_data() -> + [_Point :: {[Label], Epoch}] +when + Label :: TypeLabel | NameLabel | CertTypeLabel, + TypeLabel :: {listener_type, ssl | wss | quic}, + NameLabel :: {listener_name, atom()}, + CertTypeLabel :: {cert_type, cacertfile | certfile}, + Epoch :: non_neg_integer(). +emqx_certs_data() -> + case emqx_config:get([listeners], undefined) of + undefined -> + []; + AllListeners when is_map(AllListeners) -> + lists:foldl( + fun(ListenerType, PointsAcc) -> + PointsAcc ++ + points_of_listeners(ListenerType, AllListeners) + end, + _PointsInitAcc = [], + ?LISTENER_TYPES + ) + end. + +points_of_listeners(Type, AllListeners) -> + do_points_of_listeners(Type, maps:get(Type, AllListeners, undefined)). + +-define(CERT_TYPES, [cacertfile, certfile]). + +-spec do_points_of_listeners(Type, TypeOfListeners) -> + [_Point :: {[{LabelKey, LabelValue}], Epoch}] +when + Type :: ssl | wss | quic, + TypeOfListeners :: #{ListenerName :: atom() => ListenerConf :: map()} | undefined, + LabelKey :: atom(), + LabelValue :: atom(), + Epoch :: non_neg_integer(). +do_points_of_listeners(_, undefined) -> + []; +do_points_of_listeners(ListenerType, TypeOfListeners) -> + lists:foldl( + fun(Name, PointsAcc) -> + lists:foldl( + fun(CertType, AccIn) -> + case + emqx_utils_maps:deep_get( + [Name, ssl_options, CertType], TypeOfListeners, undefined + ) + of + undefined -> AccIn; + Path -> [gen_point(ListenerType, Name, CertType, Path) | AccIn] + end + end, + [], + ?CERT_TYPES + ) ++ PointsAcc + end, + [], + maps:keys(TypeOfListeners) + ). + +gen_point(Type, Name, CertType, Path) -> + { + %% Labels: [{_Labelkey, _LabelValue}] + [ + {listener_type, Type}, + {listener_name, Name}, + {cert_type, CertType} + ], + %% Value + cert_expiry_at_from_path(Path) + }. + +collect_certs_json(CertsData) -> + lists:foldl( + fun({Labels, Data}, AccIn) -> + [(maps:from_list(Labels))#{emqx_cert_expiry_at => Data} | AccIn] + end, + _InitAcc = [], + CertsData + ). + +%% TODO: cert manager for more generic utils functions +cert_expiry_at_from_path(Path0) -> + Path = emqx_schema:naive_env_interpolation(Path0), + {ok, PemBin} = file:read_file(Path), + [CertEntry | _] = public_key:pem_decode(PemBin), + Cert = public_key:pem_entry_decode(CertEntry), + {'utcTime', NotAfterUtc} = + Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', + utc_time_to_epoch(NotAfterUtc). + +utc_time_to_epoch(UtcTime) -> + date_to_expiry_epoch(utc_time_to_datetime(UtcTime)). + +utc_time_to_datetime(Str) -> + {ok, [Year, Month, Day, Hour, Minute, Second], _} = io_lib:fread( + "~2d~2d~2d~2d~2d~2dZ", Str + ), + %% Alwoys Assuming YY is in 2000 + {{2000 + Year, Month, Day}, {Hour, Minute, Second}}. + +%% 62167219200 =:= calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}). +-define(EPOCH_START, 62167219200). +-spec date_to_expiry_epoch(calendar:datetime()) -> Seconds :: non_neg_integer(). +date_to_expiry_epoch(DateTime) -> + calendar:datetime_to_gregorian_seconds(DateTime) - ?EPOCH_START. + %% deprecated_since 5.0.10, remove this when 5.1.x do_start() -> emqx_prometheus_sup:start_child(?APP). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 44e0fac16..5bfa3e3a5 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -181,7 +181,7 @@ recommend_setting_example() -> prometheus_data_schema() -> #{ description => - <<"Get Prometheus Data. Note that support for JSON output is deprecated and will be removed in v5.2.">>, + <<"Get Prometheus Data.">>, content => [ {'text/plain', #{schema => #{type => string}}}, From e0feb580b6a69dc6fdc1b3c23ce857769c646c9b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 12 Jan 2024 05:40:28 +0800 Subject: [PATCH 10/38] feat(prometheus): auth metrics with text/plain --- apps/emqx_conf/src/emqx_conf_schema.erl | 1 + .../include/emqx_prometheus.hrl | 12 + apps/emqx_prometheus/rebar.config | 3 +- .../src/emqx_prometheus.app.src | 2 +- apps/emqx_prometheus/src/emqx_prometheus.erl | 11 +- .../src/emqx_prometheus_api.erl | 30 +- .../src/emqx_prometheus_auth.erl | 400 ++++++++++++++++++ .../src/emqx_prometheus_config.erl | 11 +- rel/i18n/emqx_prometheus_api.hocon | 5 + 9 files changed, 467 insertions(+), 8 deletions(-) create mode 100644 apps/emqx_prometheus/src/emqx_prometheus_auth.erl diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index 6614b24e2..571f5785b 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1106,6 +1106,7 @@ tr_prometheus_collectors(Conf) -> prometheus_summary, %% emqx collectors emqx_prometheus, + {'/prometheus/auth', emqx_prometheus_auth}, emqx_prometheus_mria %% builtin vm collectors | prometheus_collectors(Conf) diff --git a/apps/emqx_prometheus/include/emqx_prometheus.hrl b/apps/emqx_prometheus/include/emqx_prometheus.hrl index 8d552f025..9057f2b14 100644 --- a/apps/emqx_prometheus/include/emqx_prometheus.hrl +++ b/apps/emqx_prometheus/include/emqx_prometheus.hrl @@ -16,3 +16,15 @@ -define(APP, emqx_prometheus). -define(PROMETHEUS, [prometheus]). + +-define(PROMETHEUS_DEFAULT_REGISTRY, default). +-define(PROMETHEUS_AUTH_REGISTRY, '/prometheus/auth'). +-define(PROMETHEUS_AUTH_COLLECTOR, emqx_prometheus_auth). +-define(PROMETHEUS_DATA_INTEGRATION_REGISTRY, '/prometheus/data_integration'). +-define(PROMETHEUS_DATA_INTEGRATION_COLLECTOR, emqx_prometheus_data_integration). + +-define(PROMETHEUS_ALL_REGISTRYS, [ + ?PROMETHEUS_DEFAULT_REGISTRY, + ?PROMETHEUS_AUTH_REGISTRY, + ?PROMETHEUS_DATA_INTEGRATION_REGISTRY +]). diff --git a/apps/emqx_prometheus/rebar.config b/apps/emqx_prometheus/rebar.config index 12aa9060b..649437765 100644 --- a/apps/emqx_prometheus/rebar.config +++ b/apps/emqx_prometheus/rebar.config @@ -3,7 +3,8 @@ {deps, [ {emqx, {path, "../emqx"}}, {emqx_utils, {path, "../emqx_utils"}}, - {prometheus, {git, "https://github.com/emqx/prometheus.erl", {tag, "v4.10.0.1"}}} + {emqx_auth, {path, "../emqx_auth"}}, + {prometheus, {git, "https://github.com/emqx/prometheus.erl", {tag, "v4.10.0.2"}}} ]}. {edoc_opts, [{preprocess, true}]}. diff --git a/apps/emqx_prometheus/src/emqx_prometheus.app.src b/apps/emqx_prometheus/src/emqx_prometheus.app.src index fe0c42566..75c608087 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.app.src +++ b/apps/emqx_prometheus/src/emqx_prometheus.app.src @@ -5,7 +5,7 @@ {vsn, "5.0.19"}, {modules, []}, {registered, [emqx_prometheus_sup]}, - {applications, [kernel, stdlib, prometheus, emqx, emqx_management]}, + {applications, [kernel, stdlib, prometheus, emqx, emqx_auth, emqx_management]}, {mod, {emqx_prometheus_app, []}}, {env, []}, {licenses, ["Apache-2.0"]}, diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 3ac32a47c..7c3283043 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -121,7 +121,7 @@ handle_info(_Msg, State) -> {noreply, State}. push_to_push_gateway(Url, Headers) when is_list(Headers) -> - Data = prometheus_text_format:format(), + Data = prometheus_text_format:format(?PROMETHEUS_DEFAULT_REGISTRY), case httpc:request(post, {Url, Headers, "text/plain", Data}, ?HTTP_OPTIONS, []) of {ok, {{"HTTP/1.1", 200, _}, _RespHeaders, _RespBody}} -> ok; @@ -168,10 +168,10 @@ join_url(Url, JobName0) -> }), lists:concat([Url, "/metrics/job/", unicode:characters_to_list(JobName1)]). -deregister_cleanup(_Registry) -> +deregister_cleanup(?PROMETHEUS_DEFAULT_REGISTRY) -> ok. -collect_mf(_Registry, Callback) -> +collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), @@ -192,6 +192,8 @@ collect_mf(_Registry, Callback) -> _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_olp()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_acl()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_authn()], + ok; +collect_mf(_Registry, _Callback) -> ok. %% @private @@ -216,7 +218,7 @@ collect(<<"json">>) -> session => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_session()]) }; collect(<<"prometheus">>) -> - prometheus_text_format:format(). + prometheus_text_format:format(?PROMETHEUS_DEFAULT_REGISTRY). %% @private collect_stats(Name, Stats) -> @@ -809,6 +811,7 @@ cert_expiry_at_from_path(Path0) -> {ok, PemBin} = file:read_file(Path), [CertEntry | _] = public_key:pem_decode(PemBin), Cert = public_key:pem_entry_decode(CertEntry), + %% TODO: Not fully tested for all certs type {'utcTime', NotAfterUtc} = Cert#'Certificate'.'tbsCertificate'#'TBSCertificate'.validity#'Validity'.'notAfter', utc_time_to_epoch(NotAfterUtc). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 5bfa3e3a5..1017dd16b 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -28,7 +28,8 @@ -export([ setting/2, - stats/2 + stats/2, + auth/2 ]). -define(TAGS, [<<"Monitor">>]). @@ -39,6 +40,7 @@ api_spec() -> paths() -> [ "/prometheus", + "/prometheus/auth", "/prometheus/stats" ]. @@ -61,6 +63,18 @@ schema("/prometheus") -> #{200 => prometheus_setting_response()} } }; +schema("/prometheus/auth") -> + #{ + 'operationId' => auth, + get => + #{ + description => ?DESC(get_prom_auth_data), + tags => ?TAGS, + security => security(), + responses => + #{200 => prometheus_data_schema()} + } + }; schema("/prometheus/stats") -> #{ 'operationId' => stats, @@ -114,6 +128,20 @@ stats(get, #{headers := Headers}) -> {200, #{<<"content-type">> => <<"text/plain">>}, Data} end. +auth(get, #{headers := Headers}) -> + Type = + case maps:get(<<"accept">>, Headers, <<"text/plain">>) of + <<"application/json">> -> <<"json">>; + _ -> <<"prometheus">> + end, + Data = emqx_prometheus_auth:collect(Type), + case Type of + <<"json">> -> + {200, Data}; + <<"prometheus">> -> + {200, #{<<"content-type">> => <<"text/plain">>}, Data} + end. + %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl new file mode 100644 index 000000000..5257f225b --- /dev/null +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -0,0 +1,400 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_prometheus_auth). + +-export([ + deregister_cleanup/1, + collect_mf/2, + collect_metrics/2 +]). + +-export([collect/1]). + +-include("emqx_prometheus.hrl"). +-include_lib("emqx_auth/include/emqx_authn_chains.hrl"). +-include_lib("prometheus/include/prometheus.hrl"). + +-import( + prometheus_model_helpers, + [ + create_mf/5, + gauge_metric/1, + gauge_metrics/1 + ] +). + +-type authn_metric_key() :: + emqx_authn_enable + | emqx_authn_status + | emqx_authn_nomatch + | emqx_authn_total + | emqx_authn_success + | emqx_authn_failed + | emqx_authn_rate + | emqx_authn_rate_last5m + | emqx_authn_rate_max. + +-type authz_metric_key() :: + emqx_authz_enable + | emqx_authz_status + | emqx_authz_nomatch + | emqx_authz_total + | emqx_authz_success + | emqx_authz_failed + | emqx_authz_rate + | emqx_authz_rate_last5m + | emqx_authz_rate_max. + +%% Please don't remove this attribute, prometheus uses it to +%% automatically register collectors. +-behaviour(prometheus_collector). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +-define(METRIC_NAME_PREFIX, "emqx_auth_"). + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(MG0(K, MAP), maps:get(K, MAP, 0)). + +%%-------------------------------------------------------------------- +%% Collector API +%%-------------------------------------------------------------------- + +%% @private +deregister_cleanup(_) -> ok. + +%% @private +-spec collect_mf(_Registry, Callback) -> ok when + _Registry :: prometheus_registry:registry(), + Callback :: prometheus_collector:collect_mf_callback(). +%% erlfmt-ignore +collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> + _ = [add_collect_family(Name, authn_data(), Callback, gauge) || Name <- authn()], + _ = [add_collect_family(Name, authn_users_count_data(), Callback, gauge) || Name <- authn_users_count()], + _ = [add_collect_family(Name, authz_data(), Callback, gauge) || Name <- authz()], + _ = [add_collect_family(Name, authz_rules_count_data(), Callback, gauge) || Name <- authz_rules_count()], + _ = [add_collect_family(Name, banned_count_data(), Callback, gauge) || Name <- banned()], + ok; +collect_mf(_, _) -> + ok. + +%% @private +collect(<<"json">>) -> + %% TODO + #{}; +collect(<<"prometheus">>) -> + prometheus_text_format:format(?PROMETHEUS_AUTH_REGISTRY). + +add_collect_family(Name, Data, Callback, Type) -> + Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). + +collect_metrics(Name, Metrics) -> + collect_auth(Name, Metrics). + +%%-------------------------------------------------------------------- +%% Collector +%%-------------------------------------------------------------------- + +%%==================== +%% Authn overview +collect_auth(K = emqx_authn_enable, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_status, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_nomatch, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_total, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authn_rate_max, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Authn users count +%% Only provided for `password_based:built_in_database` and `scram:built_in_database` +collect_auth(K = emqx_authn_users_count, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Authz overview +collect_auth(K = emqx_authz_enable, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_status, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_nomatch, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_total, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_auth(K = emqx_authz_rate_max, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Authz rules count +%% Only provided for `file` and `built_in_database` +collect_auth(K = emqx_authz_rules_count, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Banned +collect_auth(emqx_banned_count, Data) -> + gauge_metric(Data). + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +%%======================================== +%% AuthN (Authentication) +%%======================================== + +%%==================== +%% Authn overview +authn() -> + [ + emqx_authn_enable, + emqx_authn_status, + emqx_authn_nomatch, + emqx_authn_total, + emqx_authn_success, + emqx_authn_failed, + emqx_authn_rate, + emqx_authn_rate_last5m, + emqx_authn_rate_max + ]. + +-spec authn_data() -> #{Key => [Point]} when + Key :: authn_metric_key(), + Point :: {[Label], Metric}, + Label :: IdLabel, + IdLabel :: {id, AuthnName :: binary()}, + Metric :: number(). +authn_data() -> + Authns = emqx_config:get([authentication]), + lists:foldl( + fun(Key, AccIn) -> + AccIn#{Key => authn_backend_to_points(Key, Authns)} + end, + #{}, + authn() + ). + +-spec authn_backend_to_points(Key, list(Authn)) -> list(Point) when + Key :: authn_metric_key(), + Authn :: map(), + Point :: {[Label], Metric}, + Label :: IdLabel, + IdLabel :: {id, AuthnName :: binary()}, + Metric :: number(). +authn_backend_to_points(Key, Authns) -> + do_authn_backend_to_points(Key, Authns, []). + +do_authn_backend_to_points(_K, [], AccIn) -> + lists:reverse(AccIn); +do_authn_backend_to_points(K, [Authn | Rest], AccIn) -> + Id = authenticator_id(Authn), + Point = {[{id, Id}], do_metric(K, Authn, lookup_authn_metrics_local(Id))}, + do_authn_backend_to_points(K, Rest, [Point | AccIn]). + +lookup_authn_metrics_local(Id) -> + case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of + {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + #{ + emqx_authn_status => status_to_number(Status), + emqx_authn_nomatch => ?MG0(nomatch, Counters), + emqx_authn_total => ?MG0(total, Counters), + emqx_authn_success => ?MG0(success, Counters), + emqx_authn_failed => ?MG0(failed, Counters), + emqx_authn_rate => ?MG0(current, Rate), + emqx_authn_rate_last5m => ?MG0(last5m, Rate), + emqx_authn_rate_max => ?MG0(max, Rate) + }; + {error, _Reason} -> + maps:from_keys(authn() -- [emqx_authn_enable], 0) + end. + +%%==================== +%% Authn users count + +authn_users_count() -> + [emqx_authn_users_count]. + +-define(AUTHN_MNESIA, emqx_authn_mnesia). +-define(AUTHN_SCRAM_MNESIA, emqx_authn_scram_mnesia). + +authn_users_count_data() -> + Samples = lists:foldl( + fun + (#{backend := built_in_database, mechanism := password_based} = Authn, AccIn) -> + [auth_data_sample_point(authn, Authn, ?AUTHN_MNESIA) | AccIn]; + (#{backend := built_in_database, mechanism := scram} = Authn, AccIn) -> + [auth_data_sample_point(authn, Authn, ?AUTHN_SCRAM_MNESIA) | AccIn]; + (_, AccIn) -> + AccIn + end, + [], + emqx_config:get([authentication]) + ), + #{emqx_authn_users_count => Samples}. + +%%======================================== +%% AuthZ (Authorization) +%%======================================== + +%%==================== +%% Authz overview +authz() -> + [ + emqx_authz_enable, + emqx_authz_status, + emqx_authz_nomatch, + emqx_authz_total, + emqx_authz_success, + emqx_authz_failed, + emqx_authz_rate, + emqx_authz_rate_last5m, + emqx_authz_rate_max + ]. + +-spec authz_data() -> #{Key => [Point]} when + Key :: authz_metric_key(), + Point :: {[Label], Metric}, + Label :: TypeLabel, + TypeLabel :: {type, AuthZType :: binary()}, + Metric :: number(). +authz_data() -> + Authzs = emqx_config:get([authorization, sources]), + lists:foldl( + fun(Key, AccIn) -> + AccIn#{Key => authz_backend_to_points(Key, Authzs)} + end, + #{}, + authz() + ). + +-spec authz_backend_to_points(Key, list(Authz)) -> list(Point) when + Key :: authz_metric_key(), + Authz :: map(), + Point :: {[Label], Metric}, + Label :: TypeLabel, + TypeLabel :: {type, AuthZType :: binary()}, + Metric :: number(). +authz_backend_to_points(Key, Authzs) -> + do_authz_backend_to_points(Key, Authzs, []). + +do_authz_backend_to_points(_K, [], AccIn) -> + lists:reverse(AccIn); +do_authz_backend_to_points(K, [Authz | Rest], AccIn) -> + Type = maps:get(type, Authz), + Point = {[{type, Type}], do_metric(K, Authz, lookup_authz_metrics_local(Type))}, + do_authz_backend_to_points(K, Rest, [Point | AccIn]). + +lookup_authz_metrics_local(Type) -> + case emqx_authz_api_sources:lookup_from_local_node(Type) of + {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + #{ + emqx_authz_status => status_to_number(Status), + emqx_authz_nomatch => ?MG0(nomatch, Counters), + emqx_authz_total => ?MG0(total, Counters), + emqx_authz_success => ?MG0(success, Counters), + emqx_authz_failed => ?MG0(failed, Counters), + emqx_authz_rate => ?MG0(current, Rate), + emqx_authz_rate_last5m => ?MG0(last5m, Rate), + emqx_authz_rate_max => ?MG0(max, Rate) + }; + {error, _Reason} -> + maps:from_keys(authz() -- [emqx_authz_enable], 0) + end. + +%%==================== +%% Authz rules count + +authz_rules_count() -> + [emqx_authz_rules_count]. + +-define(ACL_TABLE, emqx_acl). + +authz_rules_count_data() -> + Samples = lists:foldl( + fun + (#{type := built_in_database} = Authz, AccIn) -> + [auth_data_sample_point(authz, Authz, ?ACL_TABLE) | AccIn]; + (#{type := file}, AccIn) -> + #{annotations := #{rules := Rules}} = emqx_authz:lookup(file), + Size = erlang:length(Rules), + [{[{type, file}], Size} | AccIn]; + (_, AccIn) -> + AccIn + end, + [], + emqx_config:get([authorization, sources]) + ), + #{emqx_authz_rules_count => Samples}. + +%%======================================== +%% Banned +%%======================================== + +%%==================== +%% Banned count + +banned() -> + [emqx_banned_count]. + +-define(BANNED_TABLE, emqx_banned). +banned_count_data() -> + mnesia_size(?BANNED_TABLE). + +%%-------------------------------------------------------------------- +%% Helper functions +%%-------------------------------------------------------------------- + +authenticator_id(Authn) -> + emqx_authn_chains:authenticator_id(Authn). + +auth_data_sample_point(authn, Authn, Tab) -> + Size = mnesia_size(Tab), + Id = authenticator_id(Authn), + {[{id, Id}], Size}; +auth_data_sample_point(authz, #{type := Type} = _Authz, Tab) -> + Size = mnesia_size(Tab), + {[{type, Type}], Size}. + +mnesia_size(Tab) -> + mnesia:table_info(Tab, size). + +do_metric(emqx_authn_enable, #{enable := B}, _) -> + boolean_to_number(B); +do_metric(K, _, Metrics) -> + ?MG0(K, Metrics). + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +status_to_number(stopped) -> 0. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_config.erl b/apps/emqx_prometheus/src/emqx_prometheus_config.erl index a24b52537..bf7e747c8 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_config.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_config.erl @@ -101,7 +101,7 @@ post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) -> ok. update_prometheus(AppEnvs) -> - PrevCollectors = prometheus_registry:collectors(default), + PrevCollectors = all_collectors(), CurCollectors = proplists:get_value(collectors, proplists:get_value(prometheus, AppEnvs)), lists:foreach( fun prometheus_registry:deregister_collector/1, @@ -113,6 +113,15 @@ update_prometheus(AppEnvs) -> ), application:set_env(AppEnvs). +all_collectors() -> + lists:foldl( + fun(Registry, AccIn) -> + prometheus_registry:collectors(Registry) ++ AccIn + end, + _InitAcc = [], + ?PROMETHEUS_ALL_REGISTRYS + ). + update_push_gateway(Prometheus) -> case is_push_gateway_server_enabled(Prometheus) of true -> diff --git a/rel/i18n/emqx_prometheus_api.hocon b/rel/i18n/emqx_prometheus_api.hocon index 0d9b5dc5f..89999fdd7 100644 --- a/rel/i18n/emqx_prometheus_api.hocon +++ b/rel/i18n/emqx_prometheus_api.hocon @@ -15,4 +15,9 @@ get_prom_data.desc: get_prom_data.label: """Prometheus Metrics""" +get_prom_auth_data.desc: +"""Get Prometheus Metrics for AuthN, AuthZ and Banned""" +get_prom_auth_data.label: +"""Prometheus Metrics for Auth""" + } From bf2e4d134a6db126611394b13e78de49f1bde36f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 04:25:53 +0800 Subject: [PATCH 11/38] refactor(prometheus): generic api response funcs --- .../src/emqx_prometheus_api.erl | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 1017dd16b..280f1aa8d 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -19,6 +19,7 @@ -behaviour(minirest_api). -include_lib("hocon/include/hoconsc.hrl"). +-include_lib("emqx/include/logger.hrl"). -export([ api_spec/0, @@ -115,37 +116,40 @@ setting(put, #{body := Body}) -> end. stats(get, #{headers := Headers}) -> - Type = - case maps:get(<<"accept">>, Headers, <<"text/plain">>) of - <<"application/json">> -> <<"json">>; - _ -> <<"prometheus">> - end, - Data = emqx_prometheus:collect(Type), - case Type of - <<"json">> -> - {200, Data}; - <<"prometheus">> -> - {200, #{<<"content-type">> => <<"text/plain">>}, Data} - end. + collect(emqx_prometheus, Headers). auth(get, #{headers := Headers}) -> - Type = - case maps:get(<<"accept">>, Headers, <<"text/plain">>) of - <<"application/json">> -> <<"json">>; - _ -> <<"prometheus">> - end, - Data = emqx_prometheus_auth:collect(Type), - case Type of - <<"json">> -> - {200, Data}; - <<"prometheus">> -> - {200, #{<<"content-type">> => <<"text/plain">>}, Data} - end. + collect(emqx_prometheus_auth, Headers). %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- +collect(Module, Headers) -> + Type = response_type(Headers), + Data = + case erlang:function_exported(Module, collect, 1) of + true -> + erlang:apply(Module, collect, [Type]); + false -> + ?SLOG(error, #{ + msg => "prometheus callback module not found, empty data responded", + module_name => Module + }), + <<>> + end, + gen_response(Type, Data). + +response_type(#{<<"accept">> := <<"application/json">>}) -> + <<"json">>; +response_type(_) -> + <<"prometheus">>. + +gen_response(<<"json">>, Data) -> + {200, Data}; +gen_response(<<"prometheus">>, Data) -> + {200, #{<<"content-type">> => <<"text/plain">>}, Data}. + prometheus_setting_request() -> [{prometheus, #{type := Setting}}] = emqx_prometheus_schema:roots(), emqx_dashboard_swagger:schema_with_examples( From 5158395bcf172c3493b9302d73dcccea63559fa8 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 04:39:36 +0800 Subject: [PATCH 12/38] feat(prometheus): data integration prom data --- apps/emqx_conf/src/emqx_conf_schema.erl | 1 + .../src/emqx_prometheus_api.erl | 21 +- .../src/emqx_prometheus_data_integration.erl | 491 ++++++++++++++++++ rel/i18n/emqx_prometheus_api.hocon | 5 + 4 files changed, 516 insertions(+), 2 deletions(-) create mode 100644 apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl diff --git a/apps/emqx_conf/src/emqx_conf_schema.erl b/apps/emqx_conf/src/emqx_conf_schema.erl index 571f5785b..abb2e14e3 100644 --- a/apps/emqx_conf/src/emqx_conf_schema.erl +++ b/apps/emqx_conf/src/emqx_conf_schema.erl @@ -1107,6 +1107,7 @@ tr_prometheus_collectors(Conf) -> %% emqx collectors emqx_prometheus, {'/prometheus/auth', emqx_prometheus_auth}, + {'/prometheus/data_integration', emqx_prometheus_data_integration}, emqx_prometheus_mria %% builtin vm collectors | prometheus_collectors(Conf) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 280f1aa8d..32cb89177 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -30,7 +30,8 @@ -export([ setting/2, stats/2, - auth/2 + auth/2, + data_integration/2 ]). -define(TAGS, [<<"Monitor">>]). @@ -42,7 +43,8 @@ paths() -> [ "/prometheus", "/prometheus/auth", - "/prometheus/stats" + "/prometheus/stats", + "/prometheus/data_integration" ]. schema("/prometheus") -> @@ -87,6 +89,18 @@ schema("/prometheus/stats") -> responses => #{200 => prometheus_data_schema()} } + }; +schema("/prometheus/data_integration") -> + #{ + 'operationId' => data_integration, + get => + #{ + description => ?DESC(get_prom_data_integration_data), + tags => ?TAGS, + security => security(), + responses => + #{200 => prometheus_data_schema()} + } }. security() -> @@ -121,6 +135,9 @@ stats(get, #{headers := Headers}) -> auth(get, #{headers := Headers}) -> collect(emqx_prometheus_auth, Headers). +data_integration(get, #{headers := Headers}) -> + collect(emqx_prometheus_data_integration, Headers). + %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl new file mode 100644 index 000000000..4c679b842 --- /dev/null +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -0,0 +1,491 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_prometheus_data_integration). + +-export([ + deregister_cleanup/1, + collect_mf/2, + collect_metrics/2 +]). + +-export([collect/1]). + +-export([add_collect_family/4]). + +-export([ + rules/0, + rules_data/1, + actions/0, + actions_data/1, + actions_exec_count/0, + actions_exec_count_data/0, + schema_registry/0, + schema_registry_data/0, + connectors/0, + connectors_data/0, + rule_specific/0, + rule_specific_data/1, + action_specific/0, + action_specific_data/0, + connector_specific/0, + connector_specific_data/0 +]). + +-include("emqx_prometheus.hrl"). +-include_lib("prometheus/include/prometheus.hrl"). + +-import( + prometheus_model_helpers, + [ + create_mf/5, + gauge_metric/1, + gauge_metrics/1 + ] +). + +%% Please don't remove this attribute, prometheus uses it to +%% automatically register collectors. +-behaviour(prometheus_collector). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +-define(METRIC_NAME_PREFIX, "emqx_data_integration_"). + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(MG0(K, MAP), maps:get(K, MAP, 0)). + +%%-------------------------------------------------------------------- +%% Collector API +%%-------------------------------------------------------------------- + +%% @private +deregister_cleanup(_) -> ok. + +%% @private +-spec collect_mf(_Registry, Callback) -> ok when + _Registry :: prometheus_registry:registry(), + Callback :: prometheus_collector:collect_mf_callback(). +%% erlfmt-ignore +collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> + Rules = emqx_rule_engine:get_rules(), + _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], + _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], + _ = [add_collect_family(Name, schema_registry_data(), Callback, gauge) || Name <- schema_registry()], + _ = [add_collect_family(Name, connectors_data(), Callback, gauge) || Name <- connectors()], + _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], + _ = [add_collect_family(Name, action_specific_data(), Callback, gauge) || Name <- action_specific()], + + ok; +collect_mf(_, _) -> + ok. + +%% @private +collect(<<"json">>) -> + %% TODO + #{}; +collect(<<"prometheus">>) -> + prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). + +add_collect_family(Name, Data, Callback, Type) -> + Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). + +collect_metrics(Name, Metrics) -> + collect_di(Name, Metrics). + +%%-------------------------------------------------------------------- +%% Collector +%%-------------------------------------------------------------------- + +%%======================================== +%% Data Integration Overview +%%======================================== + +%%==================== +%% All Rules +%% Rules +collect_di(K = emqx_rule_count, Data) -> + gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_matched_rate, Data) -> + gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_matched_rate_last5m, Data) -> + gauge_metric(?MG(K, Data)); +%%==================== +%% All Actions +collect_di(K = emqx_rules_actions_rate, Data) -> + gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_actions_rate_last5m, Data) -> + gauge_metric(?MG(K, Data)); +%%==================== +%% Schema Registry +collect_di(K = emqx_schema_registry_count, Data) -> + gauge_metric(?MG(K, Data)); +%%==================== +%% Connectors +collect_di(K = emqx_connector_count, Data) -> + gauge_metric(?MG(K, Data)); +%%======================================== +%% Data Integration for Specific: Rule && Action && Connector +%%======================================== + +%%==================== +%% Specific Rule +collect_di(K = emqx_rule_matched, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_passed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_exception, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_no_result, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_total, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_unknown, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched_rate_max, Data) -> + gauge_metrics(?MG(K, Data)); +%%==================== +%% Specific Action + +collect_di(K = emqx_action_matched, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_rate, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_inflight, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_received, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_late_reply, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_success, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_failed, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_stopped, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_not_found, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_queue_full, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_other, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_expired, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_queuing, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_rate_last5m, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_rate_max, Data) -> + gauge_metrics(?MG(K, Data)). + +%%==================== +%% Specific Connector + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +%%======================================== +%% Data Integration Overview +%%======================================== + +%%==================== +%% All Rules + +rules() -> + [ + emqx_rule_count, + emqx_rules_matched_rate, + emqx_rules_matched_rate_last5m + ]. + +-define(RULE_TAB, emqx_rule_engine). + +rules_data(Rules) -> + Rate = lists:foldl( + fun( + #{id := Id}, + #{emqx_rules_matched_rate := Rate, emqx_rules_matched_rate_last5m := RateLast5m} = AccIn + ) -> + RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), + AccIn#{ + emqx_rules_matched_rate => Rate + + emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), + emqx_rules_matched_rate_last5m => RateLast5m + + emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) + } + end, + _InitAcc = maps:from_keys(rules(), 0), + Rules + ), + Rate#{emqx_rule_count => ets:info(?RULE_TAB, size)}. + +%%==================== +%% All Actions + +actions() -> + [ + emqx_rules_actions_rate, + emqx_rules_actions_rate_last5m + ]. + +actions_data(Rules) -> + lists:foldl( + fun( + #{id := Id}, + #{emqx_rules_actions_rate := Rate, emqx_rules_actions_rate_last5m := RateLast5m} = + _AccIn + ) -> + RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), + _AccIn#{ + emqx_rules_actions_rate => Rate + + emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), + emqx_rules_actions_rate_last5m => RateLast5m + + emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) + } + end, + _InitAcc = maps:from_keys(actions(), 0), + Rules + ). + +actions_exec_count() -> + [ + emqx_action_sink, + emqx_action_source + ]. + +actions_exec_count_data() -> + []. + +%%==================== +%% Schema Registry + +schema_registry() -> + [ + emqx_schema_registry_count + ]. + +schema_registry_data() -> + #{ + emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) + }. + +%%==================== +%% Connectors + +connectors() -> + [ + emqx_connector_count + ]. + +connectors_data() -> + #{ + emqx_connector_count => + lists:foldl( + fun(List, AccIn) -> erlang:length(List) + AccIn end, + 0, + [ + emqx_connector:list(), emqx_bridge:list(), emqx_bridge_v2:list() + ] + ) + }. + +%%======================================== +%% Data Integration for Specific: Rule && Action && Connector +%%======================================== + +%%==================== +%% Specific Rule +%% With rule_id as label key: `rule_id` + +rule_specific() -> + [ + emqx_rule_matched, + emqx_rule_failed, + emqx_rule_passed, + emqx_rule_failed_exception, + emqx_rule_failed_no_result, + emqx_rule_actions_total, + emqx_rule_actions_success, + emqx_rule_actions_failed, + emqx_rule_actions_failed_out_of_service, + emqx_rule_actions_failed_unknown, + emqx_rule_matched_rate, + emqx_rule_matched_rate_last5m, + emqx_rule_matched_rate_max + ]. + +rule_specific_data(Rules) -> + lists:foldl( + fun(#{id := Id} = Rule, AccIn) -> + merge_acc_with_rules(Id, get_metric(Rule), AccIn) + end, + maps:from_keys(rule_specific(), []), + Rules + ). + +merge_acc_with_rules(Id, RuleMetrics, PointsAcc) -> + maps:fold( + fun(K, V, AccIn) -> + AccIn#{K => [rule_point(Id, V) | ?MG(K, AccIn)]} + end, + PointsAcc, + RuleMetrics + ). + +rule_point(Id, V) -> + {[{rule_id, Id}], V}. + +get_metric(#{id := Id} = _Rule) -> + case emqx_metrics_worker:get_metrics(rule_metrics, Id) of + #{counters := Counters, rate := #{matched := MatchedRate}} -> + #{ + emqx_rule_matched => ?MG(matched, Counters), + emqx_rule_failed => ?MG(failed, Counters), + emqx_rule_passed => ?MG(passed, Counters), + emqx_rule_failed_exception => ?MG('failed.exception', Counters), + emqx_rule_failed_no_result => ?MG('failed.no_result', Counters), + emqx_rule_actions_total => ?MG('actions.total', Counters), + emqx_rule_actions_success => ?MG('actions.success', Counters), + emqx_rule_actions_failed => ?MG('actions.failed', Counters), + emqx_rule_actions_failed_out_of_service => ?MG( + 'actions.failed.out_of_service', Counters + ), + emqx_rule_actions_failed_unknown => ?MG('actions.failed.unknown', Counters), + emqx_rule_matched_rate => ?MG(current, MatchedRate), + emqx_rule_matched_rate_last5m => ?MG(last5m, MatchedRate), + emqx_rule_matched_rate_max => ?MG(max, MatchedRate) + } + end. + +%%==================== +%% Specific Action +%% With action_id: `{type}:{name}` as label key: `action_id` + +action_specific() -> + [ + emqx_action_matched, + emqx_action_dropped, + emqx_action_success, + emqx_action_failed, + emqx_action_rate, + emqx_action_inflight, + emqx_action_received, + emqx_action_late_reply, + emqx_action_retried, + emqx_action_retried_success, + emqx_action_retried_failed, + emqx_action_dropped_resource_stopped, + emqx_action_dropped_resource_not_found, + emqx_action_dropped_queue_full, + emqx_action_dropped_other, + emqx_action_dropped_expired, + emqx_action_queuing, + emqx_action_rate_last5m, + emqx_action_rate_max + ]. + +action_specific_data() -> + lists:foldl( + fun(#{type := Type, name := Name} = _Bridge, AccIn) -> + Id = emqx_bridge_resource:bridge_id(Type, Name), + merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) + end, + maps:from_keys(action_specific(), []), + emqx_bridge:list() + ). + +merge_acc_with_bridges(Id, BridgeMetrics, PointsAcc) -> + maps:fold( + fun(K, V, AccIn) -> + AccIn#{K => [bridge_point(Id, V) | ?MG(K, AccIn)]} + end, + PointsAcc, + BridgeMetrics + ). + +bridge_point(Id, V) -> + {[{action_id, Id}], V}. + +get_bridge_metric(Type, Name) -> + case emqx_bridge:get_metrics(Type, Name) of + #{counters := Counters, rate := #{matched := MatchedRate}, gauges := Gauges} -> + #{ + emqx_action_matched => ?MG0(matched, Counters), + emqx_action_dropped => ?MG0(dropped, Counters), + emqx_action_success => ?MG0(success, Counters), + emqx_action_failed => ?MG0(failed, Counters), + emqx_action_rate => ?MG0(current, MatchedRate), + emqx_action_inflight => ?MG0(inflight, Gauges), + emqx_action_received => ?MG0(received, Counters), + emqx_action_late_reply => ?MG0(late_reply, Counters), + emqx_action_retried => ?MG0(retried, Counters), + emqx_action_retried_success => ?MG0('retried.success', Counters), + emqx_action_retried_failed => ?MG0('retried.failed', Counters), + emqx_action_dropped_resource_stopped => ?MG0('dropped.resource_stopped', Counters), + emqx_action_dropped_resource_not_found => ?MG0( + 'dropped.resource_not_found', Counters + ), + emqx_action_dropped_queue_full => ?MG0('dropped.queue_full', Counters), + emqx_action_dropped_other => ?MG0('dropped.other', Counters), + emqx_action_dropped_expired => ?MG0('dropped.expired', Counters), + emqx_action_queuing => ?MG0(queuing, Gauges), + emqx_action_rate_last5m => ?MG0(last5m, MatchedRate), + emqx_action_rate_max => ?MG0(max, MatchedRate) + } + end. + +%% TODO: Bridge V2 + +%%==================== +%% Specific Connector +%% With connector_id: `{type}:{name}` as label key: `connector_id` + +connector_specific() -> + [ + emqx_connector_enable, + emqx_connector_status + ]. + +connector_specific_data() -> + []. + +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Help funcs diff --git a/rel/i18n/emqx_prometheus_api.hocon b/rel/i18n/emqx_prometheus_api.hocon index 89999fdd7..0c48e3add 100644 --- a/rel/i18n/emqx_prometheus_api.hocon +++ b/rel/i18n/emqx_prometheus_api.hocon @@ -20,4 +20,9 @@ get_prom_auth_data.desc: get_prom_auth_data.label: """Prometheus Metrics for Auth""" +get_prom_data_integration_data.desc: +"""Get Prometheus Metrics for Data Integration""" +get_prom_data_integration_data.label: +"""Prometheus Metrics for Data Integration""" + } From 7832bbc0a43a8786c6deb684b489ea7e137a205e Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 14 Jan 2024 23:55:44 +0800 Subject: [PATCH 13/38] fix(prom): schema registry not in ce edition --- .../src/emqx_prometheus_data_integration.erl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 4c679b842..5039cb9b6 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -295,6 +295,7 @@ actions_exec_count_data() -> %%==================== %% Schema Registry +-if(?EMQX_RELEASE_EDITION == ee). schema_registry() -> [ emqx_schema_registry_count @@ -304,6 +305,13 @@ schema_registry_data() -> #{ emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. +-else. +schema_registry() -> + []. + +schema_registry_data() -> + #{}. +-endif. %%==================== %% Connectors From 0dca9905dd41a493910e78c848e54885fdc7a58f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 00:41:54 +0800 Subject: [PATCH 14/38] test(prometheus): `{Registry, Collector}` tuple --- .../src/emqx_prometheus_config.erl | 4 ++++ .../test/emqx_prometheus_SUITE.erl | 5 ++++- .../test/emqx_prometheus_api_SUITE.erl | 21 +++++++++++++++---- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_config.erl b/apps/emqx_prometheus/src/emqx_prometheus_config.erl index bf7e747c8..f5140938c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_config.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_config.erl @@ -25,6 +25,10 @@ -export([conf/0, is_push_gateway_server_enabled/1]). -export([to_recommend_type/1]). +-ifdef(TEST). +-export([all_collectors/0]). +-endif. + update(Config) -> case emqx_conf:update( diff --git a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl index 496919b10..11ca49f89 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl @@ -103,13 +103,16 @@ init_group() -> ok = mria_rlog:wait_for_shards([?CLUSTER_RPC_SHARD], infinity), meck:new(emqx_alarm, [non_strict, passthrough, no_link]), meck:expect(emqx_alarm, activate, 3, ok), - meck:expect(emqx_alarm, deactivate, 3, ok). + meck:expect(emqx_alarm, deactivate, 3, ok), + meck:new(emqx_license_checker, [non_strict, passthrough, no_link]), + meck:expect(emqx_license_checker, expiry_epoch, fun() -> 1859673600 end). end_group() -> ekka:stop(), mria:stop(), mria_mnesia:delete_schema(), meck:unload(emqx_alarm), + meck:unload(emqx_license_checker), emqx_common_test_helpers:stop_apps([emqx_prometheus]). end_per_group(_Group, Config) -> diff --git a/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl index cc20e60c7..6092a5d54 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl @@ -128,8 +128,8 @@ t_legacy_prometheus_api(_) -> Conf2 = emqx_utils_json:decode(Response2, [return_maps]), ?assertEqual(NewConf, Conf2), - EnvCollectors = application:get_env(prometheus, collectors, []), - PromCollectors = prometheus_registry:collectors(default), + EnvCollectors = env_collectors(), + PromCollectors = all_collectors(), ?assertEqual(lists:sort(EnvCollectors), lists:sort(PromCollectors)), ?assert(lists:member(prometheus_vm_statistics_collector, EnvCollectors), EnvCollectors), @@ -221,8 +221,8 @@ t_prometheus_api(_) -> Conf2 = emqx_utils_json:decode(Response2, [return_maps]), ?assertMatch(NewConf, Conf2), - EnvCollectors = application:get_env(prometheus, collectors, []), - PromCollectors = prometheus_registry:collectors(default), + EnvCollectors = env_collectors(), + PromCollectors = all_collectors(), ?assertEqual(lists:sort(EnvCollectors), lists:sort(PromCollectors)), ?assert(lists:member(prometheus_vm_statistics_collector, EnvCollectors), EnvCollectors), @@ -308,3 +308,16 @@ request_stats(JsonAuth, Auth) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Internal Functions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +env_collectors() -> + do_env_collectors(application:get_env(prometheus, collectors, []), []). + +do_env_collectors([], Acc) -> + lists:reverse(Acc); +do_env_collectors([{_Registry, Collector} | Rest], Acc) when is_atom(Collector) -> + do_env_collectors(Rest, [Collector | Acc]); +do_env_collectors([Collector | Rest], Acc) when is_atom(Collector) -> + do_env_collectors(Rest, [Collector | Acc]). + +all_collectors() -> + emqx_prometheus_config:all_collectors(). From 38a90bd2732dbfd4647480a19a373a4d159ff242 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 02:45:18 +0800 Subject: [PATCH 15/38] fix(prom_auth): authn && authz data response by josn --- .../src/emqx_prometheus_auth.erl | 82 ++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 5257f225b..c7c65b2cb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -96,10 +96,26 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> %% TODO - #{}; + #{ + emqx_authn => collect_auth_data(authn), + emqx_authz => collect_auth_data(authz), + emqx_banned => collect_banned_data() + }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_AUTH_REGISTRY). +collect_auth_data(AuthDataType) -> + maps:fold( + fun(K, V, Acc) -> + zip_auth_metrics(AuthDataType, K, V, Acc) + end, + [], + auth_data(AuthDataType) + ). + +collect_banned_data() -> + #{emqx_banned_count => banned_count_data()}. + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). @@ -398,3 +414,67 @@ boolean_to_number(false) -> 0. status_to_number(connected) -> 1; status_to_number(stopped) -> 0. + +zip_auth_metrics(AuthDataType, K, V, Acc) -> + LabelK = label_key(AuthDataType), + UserOrRuleD = user_rule_data(AuthDataType), + do_zip_auth_metrics(LabelK, UserOrRuleD, K, V, Acc). + +do_zip_auth_metrics(LabelK, UserOrRuleD, Key, Points, [] = _AccIn) -> + lists:foldl( + fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> + %% for initialized empty AccIn + %% The following fields will be put into Result + %% For Authn: + %% `id`, `emqx_authn_users_count` + %% For Authz: + %% `type`, `emqx_authz_rules_count` + Point = (users_or_rule_count(LabelK, LabelV, UserOrRuleD))#{ + LabelK => LabelV, Key => Metric + }, + [Point | AccIn2] + end, + [], + Points + ); +do_zip_auth_metrics(LabelK, _UserOrRuleD, Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl( + fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> + [#{LabelK => Id, Key => Metric} | AccIn2] + end, + [], + Points + ), + lists:zipwith( + fun(AllResulted, ThisKeyMetricOut) -> + maps:merge(AllResulted, ThisKeyMetricOut) + end, + AllResultedAcc, + ThisKeyResult + ). + +auth_data(authn) -> authn_data(); +auth_data(authz) -> authz_data(). + +label_key(authn) -> id; +label_key(authz) -> type. + +user_rule_data(authn) -> authn_users_count_data(); +user_rule_data(authz) -> authz_rules_count_data(). + +users_or_rule_count(id, Id, #{emqx_authn_users_count := Points} = _AuthnUsersD) -> + case lists:keyfind([{id, Id}], 1, Points) of + {_, Metric} -> + #{emqx_authn_users_count => Metric}; + false -> + #{} + end; +users_or_rule_count(type, Type, #{emqx_authz_rules_count := Points} = _AuthzRulesD) -> + case lists:keyfind([{type, Type}], 1, Points) of + {_, Metric} -> + #{emqx_authz_rules_count => Metric}; + false -> + #{} + end; +users_or_rule_count(_, _, _) -> + #{}. From 4b23930fceaa27c3c9149ead6c3e71eedee502e3 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 10:14:51 +0800 Subject: [PATCH 16/38] fix(prometheus): license expiry and schema_registry only for ee --- apps/emqx_prometheus/src/emqx_prometheus.erl | 29 +++++++++++++++---- .../src/emqx_prometheus_data_integration.erl | 19 ++++++++---- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 7c3283043..286243c0c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -175,11 +175,9 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), - LicenseData = emqx_license_data(), ClusterData = emqx_cluster_data(), CertsData = emqx_certs_data(), %% TODO: license expiry epoch and cert expiry epoch should be cached - _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], _ = [add_collect_family(Name, CertsData, Callback, gauge) || Name <- emqx_certs()], _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], @@ -192,6 +190,7 @@ collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_olp()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_acl()], _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_authn()], + ok = maybe_collect_family_license(Callback), ok; collect_mf(_Registry, _Callback) -> ok. @@ -201,13 +200,11 @@ collect(<<"json">>) -> Metrics = emqx_metrics:all(), Stats = emqx_stats:getstats(), VMData = emqx_vm_data(), - LicenseData = emqx_license_data(), %% TODO: FIXME! %% emqx_metrics_olp()), %% emqx_metrics_acl()), %% emqx_metrics_authn()), - #{ - license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()]), + (maybe_collect_license())#{ certs => collect_certs_json(emqx_certs_data()), stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), @@ -237,6 +234,24 @@ collect_metrics(Name, Metrics) -> add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). +-if(?EMQX_RELEASE_EDITION == ee). +maybe_collect_family_license(Callback) -> + LicenseData = emqx_license_data(), + _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], + ok. + +maybe_collect_license() -> + LicenseData = emqx_license_data(), + #{license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()])}. + +-else. +maybe_collect_family_license(_) -> + ok. + +maybe_collect_license() -> + #{}. +-endif. + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -707,6 +722,7 @@ emqx_cluster_data() -> {nodes_stopped, length(Stopped)} ]. +-if(?EMQX_RELEASE_EDITION == ee). emqx_license() -> [ emqx_license_expiry_at @@ -716,6 +732,9 @@ emqx_license_data() -> [ {expiry_at, emqx_license_checker:expiry_epoch()} ]. +-else. + +-endif. emqx_certs() -> [ diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 5039cb9b6..092ed3a71 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -86,11 +86,10 @@ collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [add_collect_family(Name, schema_registry_data(), Callback, gauge) || Name <- schema_registry()], _ = [add_collect_family(Name, connectors_data(), Callback, gauge) || Name <- connectors()], _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], _ = [add_collect_family(Name, action_specific_data(), Callback, gauge) || Name <- action_specific()], - + ok = maybe_collect_family_schema_registry(Callback), ok; collect_mf(_, _) -> ok. @@ -108,6 +107,18 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). +-if(?EMQX_RELEASE_EDITION == ee). +maybe_collect_family_schema_registry(Callback) -> + _ = [ + add_collect_family(Name, schema_registry_data(), Callback, gauge) + || Name <- schema_registry() + ], + ok. +-else. +maybe_collect_family_schema_registry(_) -> + ok. +-endif. + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -306,11 +317,7 @@ schema_registry_data() -> emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. -else. -schema_registry() -> - []. -schema_registry_data() -> - #{}. -endif. %%==================== From f457def2010694a486d5886c09b9ccc7dedd8183 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 11:07:32 +0800 Subject: [PATCH 17/38] fix: use `id` uniformly as the label key for rules and actions --- .../src/emqx_prometheus_data_integration.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 092ed3a71..318b0fa16 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -384,7 +384,7 @@ merge_acc_with_rules(Id, RuleMetrics, PointsAcc) -> ). rule_point(Id, V) -> - {[{rule_id, Id}], V}. + {[{id, Id}], V}. get_metric(#{id := Id} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of @@ -448,14 +448,14 @@ action_specific_data() -> merge_acc_with_bridges(Id, BridgeMetrics, PointsAcc) -> maps:fold( fun(K, V, AccIn) -> - AccIn#{K => [bridge_point(Id, V) | ?MG(K, AccIn)]} + AccIn#{K => [action_point(Id, V) | ?MG(K, AccIn)]} end, PointsAcc, BridgeMetrics ). -bridge_point(Id, V) -> - {[{action_id, Id}], V}. +action_point(Id, V) -> + {[{id, Id}], V}. get_bridge_metric(Type, Name) -> case emqx_bridge:get_metrics(Type, Name) of From 76d9ace5829df572dd56718aa4718a8d0bfde773 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 11:28:54 +0800 Subject: [PATCH 18/38] fix: connector_count contains bridge_v1 and bridge_v2 --- .../src/emqx_prometheus_data_integration.erl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 318b0fa16..9840d2409 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -330,14 +330,8 @@ connectors() -> connectors_data() -> #{ - emqx_connector_count => - lists:foldl( - fun(List, AccIn) -> erlang:length(List) + AccIn end, - 0, - [ - emqx_connector:list(), emqx_bridge:list(), emqx_bridge_v2:list() - ] - ) + %% Both Bridge V1 and V2 + emqx_connector_count => erlang:length(emqx_bridge:list()) }. %%======================================== From 36f009b0c2fcc18d7a71498c04c0c7c76f4d1b7f Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 13:49:38 +0800 Subject: [PATCH 19/38] fix(prom): connectors specific data --- .../src/emqx_prometheus_data_integration.erl | 65 +++++++++++++++---- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 9840d2409..3546697cc 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -36,13 +36,13 @@ schema_registry/0, schema_registry_data/0, connectors/0, - connectors_data/0, + connectors_data/1, rule_specific/0, rule_specific_data/1, action_specific/0, - action_specific_data/0, + action_specific_data/1, connector_specific/0, - connector_specific_data/0 + connector_specific_data/1 ]). -include("emqx_prometheus.hrl"). @@ -84,11 +84,13 @@ deregister_cleanup(_) -> ok. %% erlfmt-ignore collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), + Bridges =emqx_bridge:list(), _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [add_collect_family(Name, connectors_data(), Callback, gauge) || Name <- connectors()], + _ = [add_collect_family(Name, connectors_data(Bridges), Callback, gauge) || Name <- connectors()], _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], - _ = [add_collect_family(Name, action_specific_data(), Callback, gauge) || Name <- action_specific()], + _ = [add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) || Name <- action_specific()], + _ = [add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) || Name <- connector_specific()], ok = maybe_collect_family_schema_registry(Callback), ok; collect_mf(_, _) -> @@ -222,11 +224,15 @@ collect_di(K = emqx_action_queuing, Data) -> collect_di(K = emqx_action_rate_last5m, Data) -> gauge_metrics(?MG(K, Data)); collect_di(K = emqx_action_rate_max, Data) -> - gauge_metrics(?MG(K, Data)). - + gauge_metrics(?MG(K, Data)); %%==================== %% Specific Connector +collect_di(K = emqx_connector_enable, Data) -> + gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_connector_status, Data) -> + gauge_metrics(?MG(K, Data)). + %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- @@ -328,10 +334,10 @@ connectors() -> emqx_connector_count ]. -connectors_data() -> +connectors_data(Brdiges) -> #{ %% Both Bridge V1 and V2 - emqx_connector_count => erlang:length(emqx_bridge:list()) + emqx_connector_count => erlang:length(Brdiges) }. %%======================================== @@ -429,14 +435,14 @@ action_specific() -> emqx_action_rate_max ]. -action_specific_data() -> +action_specific_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = _Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, maps:from_keys(action_specific(), []), - emqx_bridge:list() + Bridges ). merge_acc_with_bridges(Id, BridgeMetrics, PointsAcc) -> @@ -491,10 +497,43 @@ connector_specific() -> emqx_connector_status ]. -connector_specific_data() -> - []. +connector_specific_data(Bridges) -> + lists:foldl( + fun(#{type := Type, name := Name} = Bridge, AccIn) -> + Id = emqx_bridge_resource:bridge_id(Type, Name), + merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) + end, + maps:from_keys(connector_specific(), []), + Bridges + ). + +merge_acc_with_connectors(Id, ConnectorMetrics, PointsAcc) -> + maps:fold( + fun(K, V, AccIn) -> + AccIn#{K => [connector_point(Id, V) | ?MG(K, AccIn)]} + end, + PointsAcc, + ConnectorMetrics + ). + +connector_point(Id, V) -> + {[{id, Id}], V}. + +get_connector_status(#{resource_data := ResourceData} = _Bridge) -> + Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), + Status = ?MG(status, ResourceData), + #{ + emqx_connector_enable => boolean_to_number(Enabled), + emqx_connector_status => status_to_number(Status) + }. %%-------------------------------------------------------------------- %%-------------------------------------------------------------------- %% Help funcs + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +status_to_number(disconnected) -> 0. From 8f7964f435306e45ecc7d02dda3023d9b1fb2ea7 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 14:41:26 +0800 Subject: [PATCH 20/38] feat(prom): data integration metrics in josn format --- .../src/emqx_prometheus_auth.erl | 3 +- .../src/emqx_prometheus_data_integration.erl | 159 ++++++++++++++---- 2 files changed, 130 insertions(+), 32 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index c7c65b2cb..06d6246f1 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -95,7 +95,6 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - %% TODO #{ emqx_authn => collect_auth_data(authn), emqx_authz => collect_auth_data(authz), diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 3546697cc..3cdc3a01c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -26,24 +26,7 @@ -export([add_collect_family/4]). --export([ - rules/0, - rules_data/1, - actions/0, - actions_data/1, - actions_exec_count/0, - actions_exec_count_data/0, - schema_registry/0, - schema_registry_data/0, - connectors/0, - connectors_data/1, - rule_specific/0, - rule_specific_data/1, - action_specific/0, - action_specific_data/1, - connector_specific/0, - connector_specific_data/1 -]). +-export([actions_exec_count/0, actions_exec_count_data/0]). -include("emqx_prometheus.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -81,34 +64,97 @@ deregister_cleanup(_) -> ok. -spec collect_mf(_Registry, Callback) -> ok when _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). -%% erlfmt-ignore collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), - Bridges =emqx_bridge:list(), + Bridges = emqx_bridge:list(), + %% Data Integration Overview _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [add_collect_family(Name, connectors_data(Bridges), Callback, gauge) || Name <- connectors()], - _ = [add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) || Name <- rule_specific()], - _ = [add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) || Name <- action_specific()], - _ = [add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) || Name <- connector_specific()], + _ = [ + add_collect_family(Name, connectors_data(Bridges), Callback, gauge) + || Name <- connectors() + ], ok = maybe_collect_family_schema_registry(Callback), + + %% Rule Specific + _ = [ + add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) + || Name <- rule_specific() + ], + + %% Action Specific + _ = [ + add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) + || Name <- action_specific() + ], + + %% Connector Specific + _ = [ + add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) + || Name <- connector_specific() + ], + ok; collect_mf(_, _) -> ok. %% @private collect(<<"json">>) -> - %% TODO - #{}; + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + #{ + data_integration_overview => collect_data_integration(overview, {Rules, Bridges}), + rules => collect_data_integration(rules, Rules), + actions => collect_data_integration(actions, Bridges), + connectors => collect_data_integration(connectors, Bridges) + }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). +collect_data_integration(overview, {Rules, Bridges}) -> + RulesD = rules_data(Rules), + ActionsD = actions_data(Rules), + ConnectorsD = connectors_data(Bridges), + + M1 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, rules()), + M2 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ActionsD)} end, #{}, actions()), + M3 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, connectors()), + M4 = maybe_collect_schema_registry(), + + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]); +collect_data_integration(Type = rules, Rules) -> + maps:fold( + fun(K, V, Acc) -> + zip_metrics(Type, K, V, Acc) + end, + [], + di_data(Type, Rules) + ); +collect_data_integration(Type = actions, Rules) -> + maps:fold( + fun(K, V, Acc) -> + zip_metrics(Type, K, V, Acc) + end, + [], + di_data(Type, Rules) + ); +collect_data_integration(Type = connectors, Bridges) -> + maps:fold( + fun(K, V, Acc) -> + zip_metrics(Type, K, V, Acc) + end, + [], + di_data(Type, Bridges) + ). + -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> _ = [ @@ -116,9 +162,15 @@ maybe_collect_family_schema_registry(Callback) -> || Name <- schema_registry() ], ok. + +maybe_collect_schema_registry() -> + schema_registry_data(). -else. maybe_collect_family_schema_registry(_) -> ok. + +maybe_collect_schema_registry() -> + #{}. -endif. %%-------------------------------------------------------------------- @@ -307,7 +359,7 @@ actions_exec_count() -> ]. actions_exec_count_data() -> - []. + #{}. %%==================== %% Schema Registry @@ -485,8 +537,6 @@ get_bridge_metric(Type, Name) -> } end. -%% TODO: Bridge V2 - %%==================== %% Specific Connector %% With connector_id: `{type}:{name}` as label key: `connector_id` @@ -537,3 +587,52 @@ boolean_to_number(false) -> 0. status_to_number(connected) -> 1; status_to_number(disconnected) -> 0. + +zip_metrics(Type, K, V, Acc) -> + LabelK = label_key(Type), + do_zip_metrics(LabelK, K, V, Acc). + +do_zip_metrics(LabelK, Key, Points, [] = _AccIn) -> + lists:foldl( + fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> + %% for initialized empty AccIn + %% The following fields will be put into Result + %% For Rules: + %% `id` => [RULE_ID] + %% For Actions + %% `id` => [ACTION_ID] + %% FOR Connectors + %% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID + %% formatted with {type}:{name} + Point = + #{ + LabelK => LabelV, Key => Metric + }, + [Point | AccIn2] + end, + [], + Points + ); +do_zip_metrics(LabelK, Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl( + fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> + [#{LabelK => Id, Key => Metric} | AccIn2] + end, + [], + Points + ), + lists:zipwith( + fun(AllResulted, ThisKeyMetricOut) -> + maps:merge(AllResulted, ThisKeyMetricOut) + end, + AllResultedAcc, + ThisKeyResult + ). + +di_data(rules, Rules) -> rule_specific_data(Rules); +di_data(actions, Bridges) -> action_specific_data(Bridges); +di_data(connectors, Bridges) -> connector_specific_data(Bridges). + +label_key(rules) -> id; +label_key(actions) -> id; +label_key(connectors) -> id. From a18c4d193aa360930bddb1ae0b2ca63c43c0344c Mon Sep 17 00:00:00 2001 From: JimMoen Date: Mon, 15 Jan 2024 15:32:29 +0800 Subject: [PATCH 21/38] refactor: abstract function call --- .../src/emqx_prometheus_data_integration.erl | 31 ++++++------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 3cdc3a01c..72fd7a6e9 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -103,7 +103,7 @@ collect(<<"json">>) -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ - data_integration_overview => collect_data_integration(overview, {Rules, Bridges}), + data_integration_overview => collect_data_integration_overview(Rules, Bridges), rules => collect_data_integration(rules, Rules), actions => collect_data_integration(actions, Bridges), connectors => collect_data_integration(connectors, Bridges) @@ -111,15 +111,17 @@ collect(<<"json">>) -> collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%==================== +%% API Helpers add_collect_family(Name, Data, Callback, Type) -> + %% TODO: help document from Name Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). -collect_data_integration(overview, {Rules, Bridges}) -> +collect_data_integration_overview(Rules, Bridges) -> RulesD = rules_data(Rules), ActionsD = actions_data(Rules), ConnectorsD = connectors_data(Bridges), @@ -129,30 +131,15 @@ collect_data_integration(overview, {Rules, Bridges}) -> M3 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, connectors()), M4 = maybe_collect_schema_registry(), - lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]); -collect_data_integration(Type = rules, Rules) -> + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]). + +collect_data_integration(Type, DataSeed) -> maps:fold( fun(K, V, Acc) -> zip_metrics(Type, K, V, Acc) end, [], - di_data(Type, Rules) - ); -collect_data_integration(Type = actions, Rules) -> - maps:fold( - fun(K, V, Acc) -> - zip_metrics(Type, K, V, Acc) - end, - [], - di_data(Type, Rules) - ); -collect_data_integration(Type = connectors, Bridges) -> - maps:fold( - fun(K, V, Acc) -> - zip_metrics(Type, K, V, Acc) - end, - [], - di_data(Type, Bridges) + di_data(Type, DataSeed) ). -if(?EMQX_RELEASE_EDITION == ee). From c3e9533260c852bb256a7d0824920b9c7e7a5fdf Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 16 Jan 2024 19:45:04 +0800 Subject: [PATCH 22/38] fix: prometheus auth metrics fields and type - rm rate fields - fix few fields type to counter --- .../src/emqx_prometheus_auth.erl | 161 ++++++++---------- 1 file changed, 74 insertions(+), 87 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 06d6246f1..57406d2d2 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -32,31 +32,26 @@ [ create_mf/5, gauge_metric/1, - gauge_metrics/1 + gauge_metrics/1, + counter_metrics/1 ] ). --type authn_metric_key() :: +-type authn_metric_name() :: emqx_authn_enable | emqx_authn_status | emqx_authn_nomatch | emqx_authn_total | emqx_authn_success - | emqx_authn_failed - | emqx_authn_rate - | emqx_authn_rate_last5m - | emqx_authn_rate_max. + | emqx_authn_failed. --type authz_metric_key() :: +-type authz_metric_name() :: emqx_authz_enable | emqx_authz_status | emqx_authz_nomatch | emqx_authz_total | emqx_authz_success - | emqx_authz_failed - | emqx_authz_rate - | emqx_authz_rate_last5m - | emqx_authz_rate_max. + | emqx_authz_failed. %% Please don't remove this attribute, prometheus uses it to %% automatically register collectors. @@ -71,6 +66,36 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). +-define(AUTHNS_WITH_TYPE, [ + {emqx_authn_enable, gauge}, + {emqx_authn_status, gauge}, + {emqx_authn_nomatch, counter}, + {emqx_authn_total, counter}, + {emqx_authn_success, counter}, + {emqx_authn_failed, counter} +]). + +-define(AUTHZS_WITH_TYPE, [ + {emqx_authz_enable, gauge}, + {emqx_authz_status, gauge}, + {emqx_authz_nomatch, counter}, + {emqx_authz_total, counter}, + {emqx_authz_success, counter}, + {emqx_authz_failed, counter} +]). + +-define(AUTHN_USERS_COUNT_WITH_TYPE, [ + {emqx_authn_users_count, gauge} +]). + +-define(AUTHZ_RULES_COUNT_WITH_TYPE, [ + {emqx_authz_rules_count, gauge} +]). + +-define(BANNED_WITH_TYPE, [ + {emqx_banned_count, gauge} +]). + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -84,11 +109,11 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - _ = [add_collect_family(Name, authn_data(), Callback, gauge) || Name <- authn()], - _ = [add_collect_family(Name, authn_users_count_data(), Callback, gauge) || Name <- authn_users_count()], - _ = [add_collect_family(Name, authz_data(), Callback, gauge) || Name <- authz()], - _ = [add_collect_family(Name, authz_rules_count_data(), Callback, gauge) || Name <- authz_rules_count()], - _ = [add_collect_family(Name, banned_count_data(), Callback, gauge) || Name <- banned()], + ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, authn_data()), + ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, authn_users_count_data()), + ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, authz_data()), + ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, authz_rules_count_data()), + ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, banned_count_data()), ok; collect_mf(_, _) -> ok. @@ -115,6 +140,10 @@ collect_auth_data(AuthDataType) -> collect_banned_data() -> #{emqx_banned_count => banned_count_data()}. +add_collect_family(Callback, MetricWithType, Data) -> + _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type} <- MetricWithType], + ok. + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). @@ -132,19 +161,13 @@ collect_auth(K = emqx_authn_enable, Data) -> collect_auth(K = emqx_authn_status, Data) -> gauge_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_nomatch, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_total, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authn_failed, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authn_rate, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authn_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authn_rate_max, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); %%==================== %% Authn users count %% Only provided for `password_based:built_in_database` and `scram:built_in_database` @@ -157,19 +180,13 @@ collect_auth(K = emqx_authz_enable, Data) -> collect_auth(K = emqx_authz_status, Data) -> gauge_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_nomatch, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_total, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_auth(K = emqx_authz_failed, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_rate, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_auth(K = emqx_authz_rate_max, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); %%==================== %% Authz rules count %% Only provided for `file` and `built_in_database` @@ -190,21 +207,9 @@ collect_auth(emqx_banned_count, Data) -> %%==================== %% Authn overview -authn() -> - [ - emqx_authn_enable, - emqx_authn_status, - emqx_authn_nomatch, - emqx_authn_total, - emqx_authn_success, - emqx_authn_failed, - emqx_authn_rate, - emqx_authn_rate_last5m, - emqx_authn_rate_max - ]. -spec authn_data() -> #{Key => [Point]} when - Key :: authn_metric_key(), + Key :: authn_metric_name(), Point :: {[Label], Metric}, Label :: IdLabel, IdLabel :: {id, AuthnName :: binary()}, @@ -216,11 +221,11 @@ authn_data() -> AccIn#{Key => authn_backend_to_points(Key, Authns)} end, #{}, - authn() + authn_metric_names() ). -spec authn_backend_to_points(Key, list(Authn)) -> list(Point) when - Key :: authn_metric_key(), + Key :: authn_metric_name(), Authn :: map(), Point :: {[Label], Metric}, Label :: IdLabel, @@ -238,27 +243,24 @@ do_authn_backend_to_points(K, [Authn | Rest], AccIn) -> lookup_authn_metrics_local(Id) -> case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of - {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ emqx_authn_status => status_to_number(Status), emqx_authn_nomatch => ?MG0(nomatch, Counters), emqx_authn_total => ?MG0(total, Counters), emqx_authn_success => ?MG0(success, Counters), - emqx_authn_failed => ?MG0(failed, Counters), - emqx_authn_rate => ?MG0(current, Rate), - emqx_authn_rate_last5m => ?MG0(last5m, Rate), - emqx_authn_rate_max => ?MG0(max, Rate) + emqx_authn_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authn() -- [emqx_authn_enable], 0) + maps:from_keys(authn_metric_names() -- [emqx_authn_enable], 0) end. +authn_metric_names() -> + metric_names(?AUTHNS_WITH_TYPE). + %%==================== %% Authn users count -authn_users_count() -> - [emqx_authn_users_count]. - -define(AUTHN_MNESIA, emqx_authn_mnesia). -define(AUTHN_SCRAM_MNESIA, emqx_authn_scram_mnesia). @@ -283,21 +285,9 @@ authn_users_count_data() -> %%==================== %% Authz overview -authz() -> - [ - emqx_authz_enable, - emqx_authz_status, - emqx_authz_nomatch, - emqx_authz_total, - emqx_authz_success, - emqx_authz_failed, - emqx_authz_rate, - emqx_authz_rate_last5m, - emqx_authz_rate_max - ]. -spec authz_data() -> #{Key => [Point]} when - Key :: authz_metric_key(), + Key :: authz_metric_name(), Point :: {[Label], Metric}, Label :: TypeLabel, TypeLabel :: {type, AuthZType :: binary()}, @@ -309,11 +299,11 @@ authz_data() -> AccIn#{Key => authz_backend_to_points(Key, Authzs)} end, #{}, - authz() + authz_metric_names() ). -spec authz_backend_to_points(Key, list(Authz)) -> list(Point) when - Key :: authz_metric_key(), + Key :: authz_metric_name(), Authz :: map(), Point :: {[Label], Metric}, Label :: TypeLabel, @@ -331,27 +321,24 @@ do_authz_backend_to_points(K, [Authz | Rest], AccIn) -> lookup_authz_metrics_local(Type) -> case emqx_authz_api_sources:lookup_from_local_node(Type) of - {ok, {_Node, Status, #{counters := Counters, rate := Rate}, _ResourceMetrics}} -> + {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ emqx_authz_status => status_to_number(Status), emqx_authz_nomatch => ?MG0(nomatch, Counters), emqx_authz_total => ?MG0(total, Counters), emqx_authz_success => ?MG0(success, Counters), - emqx_authz_failed => ?MG0(failed, Counters), - emqx_authz_rate => ?MG0(current, Rate), - emqx_authz_rate_last5m => ?MG0(last5m, Rate), - emqx_authz_rate_max => ?MG0(max, Rate) + emqx_authz_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authz() -- [emqx_authz_enable], 0) + maps:from_keys(authz_metric_names() -- [emqx_authz_enable], 0) end. +authz_metric_names() -> + metric_names(?AUTHZS_WITH_TYPE). + %%==================== %% Authz rules count -authz_rules_count() -> - [emqx_authz_rules_count]. - -define(ACL_TABLE, emqx_acl). authz_rules_count_data() -> @@ -378,9 +365,6 @@ authz_rules_count_data() -> %%==================== %% Banned count -banned() -> - [emqx_banned_count]. - -define(BANNED_TABLE, emqx_banned). banned_count_data() -> mnesia_size(?BANNED_TABLE). @@ -477,3 +461,6 @@ users_or_rule_count(type, Type, #{emqx_authz_rules_count := Points} = _AuthzRule end; users_or_rule_count(_, _, _) -> #{}. + +metric_names(MetricWithType) when is_list(MetricWithType) -> + [Name || {Name, _Type} <- MetricWithType]. From 94032aafb212ad0d15a4056be24931f41930bac9 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 16 Jan 2024 22:59:00 +0800 Subject: [PATCH 23/38] fix(prom_data_integration): fix metric type --- .../src/emqx_prometheus_data_integration.erl | 338 +++++++----------- 1 file changed, 123 insertions(+), 215 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 72fd7a6e9..c41d9a6fb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -26,8 +26,6 @@ -export([add_collect_family/4]). --export([actions_exec_count/0, actions_exec_count_data/0]). - -include("emqx_prometheus.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -36,7 +34,8 @@ [ create_mf/5, gauge_metric/1, - gauge_metrics/1 + gauge_metrics/1, + counter_metrics/1 ] ). @@ -53,6 +52,58 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). +-define(RULES_WITH_TYPE, [ + {emqx_rules_count, gauge} +]). + +-define(CONNECTORS_WITH_TYPE, [ + {emqx_connectors_count, gauge} +]). + +-define(RULES_SPECIFIC_WITH_TYPE, [ + {emqx_rule_matched, counter}, + {emqx_rule_failed, counter}, + {emqx_rule_passed, counter}, + {emqx_rule_failed_exception, counter}, + {emqx_rule_failed_no_result, counter}, + {emqx_rule_actions_total, counter}, + {emqx_rule_actions_success, counter}, + {emqx_rule_actions_failed, counter}, + {emqx_rule_actions_failed_out_of_service, counter}, + {emqx_rule_actions_failed_unknown, counter} +]). + +-define(ACTION_SPECIFIC_WITH_TYPE, [ + {emqx_action_matched, counter}, + {emqx_action_dropped, counter}, + {emqx_action_success, counter}, + {emqx_action_failed, counter}, + {emqx_action_inflight, gauge}, + {emqx_action_received, counter}, + {emqx_action_late_reply, counter}, + {emqx_action_retried, counter}, + {emqx_action_retried_success, counter}, + {emqx_action_retried_failed, counter}, + {emqx_action_dropped_resource_stopped, counter}, + {emqx_action_dropped_resource_not_found, counter}, + {emqx_action_dropped_queue_full, counter}, + {emqx_action_dropped_other, counter}, + {emqx_action_dropped_expired, counter}, + {emqx_action_queuing, gauge} +]). + +-define(CONNECTOR_SPECIFIC_WITH_TYPE, [ + {emqx_connector_enable, gauge}, + {emqx_connector_status, gauge} +]). + +-if(?EMQX_RELEASE_EDITION == ee). +-define(SCHEMA_REGISTRY_WITH_TYPE, [ + emqx_schema_registrys_count +]). +-else. +-endif. + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -68,31 +119,20 @@ collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), %% Data Integration Overview - _ = [add_collect_family(Name, rules_data(Rules), Callback, gauge) || Name <- rules()], - _ = [add_collect_family(Name, actions_data(Rules), Callback, gauge) || Name <- actions()], - _ = [ - add_collect_family(Name, connectors_data(Bridges), Callback, gauge) - || Name <- connectors() - ], + ok = add_collect_family(Callback, ?RULES_WITH_TYPE, rules_data(Rules)), + ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, connectors_data(Bridges)), ok = maybe_collect_family_schema_registry(Callback), %% Rule Specific - _ = [ - add_collect_family(Name, rule_specific_data(Rules), Callback, gauge) - || Name <- rule_specific() - ], + ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, rule_specific_data(Rules)), %% Action Specific - _ = [ - add_collect_family(Name, action_specific_data(Bridges), Callback, gauge) - || Name <- action_specific() - ], + ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, action_specific_data(Bridges)), %% Connector Specific - _ = [ - add_collect_family(Name, connector_specific_data(Bridges), Callback, gauge) - || Name <- connector_specific() - ], + ok = add_collect_family( + Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, connector_specific_data(Bridges) + ), ok; collect_mf(_, _) -> @@ -114,6 +154,10 @@ collect(<<"prometheus">>) -> %%==================== %% API Helpers +add_collect_family(Callback, MetricWithType, Data) -> + _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type} <- MetricWithType], + ok. + add_collect_family(Name, Data, Callback, Type) -> %% TODO: help document from Name Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). @@ -123,15 +167,21 @@ collect_metrics(Name, Metrics) -> collect_data_integration_overview(Rules, Bridges) -> RulesD = rules_data(Rules), - ActionsD = actions_data(Rules), ConnectorsD = connectors_data(Bridges), - M1 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, rules()), - M2 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ActionsD)} end, #{}, actions()), - M3 = lists:foldl(fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, connectors()), - M4 = maybe_collect_schema_registry(), + M1 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, + #{}, + metric_names(?RULES_WITH_TYPE) + ), + M2 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, + #{}, + metric_names(?CONNECTORS_WITH_TYPE) + ), + M3 = maybe_collect_schema_registry(), - lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3, M4]). + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). collect_data_integration(Type, DataSeed) -> maps:fold( @@ -144,10 +194,7 @@ collect_data_integration(Type, DataSeed) -> -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> - _ = [ - add_collect_family(Name, schema_registry_data(), Callback, gauge) - || Name <- schema_registry() - ], + ok = add_collect_family(Callback, ?SCHEMA_REGISTRY_WITH_TYPE, schema_registry_data()), ok. maybe_collect_schema_registry() -> @@ -171,25 +218,15 @@ maybe_collect_schema_registry() -> %%==================== %% All Rules %% Rules -collect_di(K = emqx_rule_count, Data) -> - gauge_metric(?MG(K, Data)); -collect_di(K = emqx_rules_matched_rate, Data) -> - gauge_metric(?MG(K, Data)); -collect_di(K = emqx_rules_matched_rate_last5m, Data) -> - gauge_metric(?MG(K, Data)); -%%==================== -%% All Actions -collect_di(K = emqx_rules_actions_rate, Data) -> - gauge_metric(?MG(K, Data)); -collect_di(K = emqx_rules_actions_rate_last5m, Data) -> +collect_di(K = emqx_rules_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Schema Registry -collect_di(K = emqx_schema_registry_count, Data) -> +collect_di(K = emqx_schema_registrys_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Connectors -collect_di(K = emqx_connector_count, Data) -> +collect_di(K = emqx_connectors_count, Data) -> gauge_metric(?MG(K, Data)); %%======================================== %% Data Integration for Specific: Rule && Action && Connector @@ -198,71 +235,61 @@ collect_di(K = emqx_connector_count, Data) -> %%==================== %% Specific Rule collect_di(K = emqx_rule_matched, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_passed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed_exception, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed_no_result, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_total, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_failed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_actions_failed_unknown, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched_rate, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched_rate_max, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); %%==================== %% Specific Action collect_di(K = emqx_action_matched, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_failed, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_rate, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_inflight, Data) -> + %% inflight type: gauge gauge_metrics(?MG(K, Data)); collect_di(K = emqx_action_received, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_late_reply, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_retried, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_retried_success, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_retried_failed, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_resource_stopped, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_resource_not_found, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_queue_full, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_other, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_dropped_expired, Data) -> - gauge_metrics(?MG(K, Data)); + counter_metrics(?MG(K, Data)); collect_di(K = emqx_action_queuing, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_rate_last5m, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_rate_max, Data) -> + %% queuing type: gauge gauge_metrics(?MG(K, Data)); %%==================== %% Specific Connector @@ -283,100 +310,30 @@ collect_di(K = emqx_connector_status, Data) -> %%==================== %% All Rules -rules() -> - [ - emqx_rule_count, - emqx_rules_matched_rate, - emqx_rules_matched_rate_last5m - ]. - -define(RULE_TAB, emqx_rule_engine). - -rules_data(Rules) -> - Rate = lists:foldl( - fun( - #{id := Id}, - #{emqx_rules_matched_rate := Rate, emqx_rules_matched_rate_last5m := RateLast5m} = AccIn - ) -> - RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), - AccIn#{ - emqx_rules_matched_rate => Rate + - emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), - emqx_rules_matched_rate_last5m => RateLast5m + - emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) - } - end, - _InitAcc = maps:from_keys(rules(), 0), - Rules - ), - Rate#{emqx_rule_count => ets:info(?RULE_TAB, size)}. - -%%==================== -%% All Actions - -actions() -> - [ - emqx_rules_actions_rate, - emqx_rules_actions_rate_last5m - ]. - -actions_data(Rules) -> - lists:foldl( - fun( - #{id := Id}, - #{emqx_rules_actions_rate := Rate, emqx_rules_actions_rate_last5m := RateLast5m} = - _AccIn - ) -> - RuleMetrics = emqx_metrics_worker:get_metrics(rule_metrics, Id), - _AccIn#{ - emqx_rules_actions_rate => Rate + - emqx_utils_maps:deep_get([rate, matched, current], RuleMetrics, 0), - emqx_rules_actions_rate_last5m => RateLast5m + - emqx_utils_maps:deep_get([rate, matched, last5m], RuleMetrics, 0) - } - end, - _InitAcc = maps:from_keys(actions(), 0), - Rules - ). - -actions_exec_count() -> - [ - emqx_action_sink, - emqx_action_source - ]. - -actions_exec_count_data() -> - #{}. +rules_data(_Rules) -> + #{ + emqx_rules_count => ets:info(?RULE_TAB, size) + }. %%==================== %% Schema Registry -if(?EMQX_RELEASE_EDITION == ee). -schema_registry() -> - [ - emqx_schema_registry_count - ]. - schema_registry_data() -> #{ - emqx_schema_registry_count => erlang:map_size(emqx_schema_registry:list_schemas()) + emqx_schema_registrys_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. -else. - -endif. %%==================== %% Connectors -connectors() -> - [ - emqx_connector_count - ]. - connectors_data(Brdiges) -> #{ %% Both Bridge V1 and V2 - emqx_connector_count => erlang:length(Brdiges) + emqx_connectors_count => erlang:length(Brdiges) }. %%======================================== @@ -387,29 +344,12 @@ connectors_data(Brdiges) -> %% Specific Rule %% With rule_id as label key: `rule_id` -rule_specific() -> - [ - emqx_rule_matched, - emqx_rule_failed, - emqx_rule_passed, - emqx_rule_failed_exception, - emqx_rule_failed_no_result, - emqx_rule_actions_total, - emqx_rule_actions_success, - emqx_rule_actions_failed, - emqx_rule_actions_failed_out_of_service, - emqx_rule_actions_failed_unknown, - emqx_rule_matched_rate, - emqx_rule_matched_rate_last5m, - emqx_rule_matched_rate_max - ]. - rule_specific_data(Rules) -> lists:foldl( fun(#{id := Id} = Rule, AccIn) -> merge_acc_with_rules(Id, get_metric(Rule), AccIn) end, - maps:from_keys(rule_specific(), []), + maps:from_keys(metric_names(?RULES_SPECIFIC_WITH_TYPE), []), Rules ). @@ -427,7 +367,7 @@ rule_point(Id, V) -> get_metric(#{id := Id} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of - #{counters := Counters, rate := #{matched := MatchedRate}} -> + #{counters := Counters} -> #{ emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), @@ -440,10 +380,7 @@ get_metric(#{id := Id} = _Rule) -> emqx_rule_actions_failed_out_of_service => ?MG( 'actions.failed.out_of_service', Counters ), - emqx_rule_actions_failed_unknown => ?MG('actions.failed.unknown', Counters), - emqx_rule_matched_rate => ?MG(current, MatchedRate), - emqx_rule_matched_rate_last5m => ?MG(last5m, MatchedRate), - emqx_rule_matched_rate_max => ?MG(max, MatchedRate) + emqx_rule_actions_failed_unknown => ?MG('actions.failed.unknown', Counters) } end. @@ -451,36 +388,13 @@ get_metric(#{id := Id} = _Rule) -> %% Specific Action %% With action_id: `{type}:{name}` as label key: `action_id` -action_specific() -> - [ - emqx_action_matched, - emqx_action_dropped, - emqx_action_success, - emqx_action_failed, - emqx_action_rate, - emqx_action_inflight, - emqx_action_received, - emqx_action_late_reply, - emqx_action_retried, - emqx_action_retried_success, - emqx_action_retried_failed, - emqx_action_dropped_resource_stopped, - emqx_action_dropped_resource_not_found, - emqx_action_dropped_queue_full, - emqx_action_dropped_other, - emqx_action_dropped_expired, - emqx_action_queuing, - emqx_action_rate_last5m, - emqx_action_rate_max - ]. - action_specific_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = _Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, - maps:from_keys(action_specific(), []), + maps:from_keys(metric_names(?ACTION_SPECIFIC_WITH_TYPE), []), Bridges ). @@ -498,13 +412,12 @@ action_point(Id, V) -> get_bridge_metric(Type, Name) -> case emqx_bridge:get_metrics(Type, Name) of - #{counters := Counters, rate := #{matched := MatchedRate}, gauges := Gauges} -> + #{counters := Counters, gauges := Gauges} -> #{ emqx_action_matched => ?MG0(matched, Counters), emqx_action_dropped => ?MG0(dropped, Counters), emqx_action_success => ?MG0(success, Counters), emqx_action_failed => ?MG0(failed, Counters), - emqx_action_rate => ?MG0(current, MatchedRate), emqx_action_inflight => ?MG0(inflight, Gauges), emqx_action_received => ?MG0(received, Counters), emqx_action_late_reply => ?MG0(late_reply, Counters), @@ -518,9 +431,7 @@ get_bridge_metric(Type, Name) -> emqx_action_dropped_queue_full => ?MG0('dropped.queue_full', Counters), emqx_action_dropped_other => ?MG0('dropped.other', Counters), emqx_action_dropped_expired => ?MG0('dropped.expired', Counters), - emqx_action_queuing => ?MG0(queuing, Gauges), - emqx_action_rate_last5m => ?MG0(last5m, MatchedRate), - emqx_action_rate_max => ?MG0(max, MatchedRate) + emqx_action_queuing => ?MG0(queuing, Gauges) } end. @@ -528,19 +439,13 @@ get_bridge_metric(Type, Name) -> %% Specific Connector %% With connector_id: `{type}:{name}` as label key: `connector_id` -connector_specific() -> - [ - emqx_connector_enable, - emqx_connector_status - ]. - connector_specific_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) end, - maps:from_keys(connector_specific(), []), + maps:from_keys(metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE), []), Bridges ). @@ -623,3 +528,6 @@ di_data(connectors, Bridges) -> connector_specific_data(Bridges). label_key(rules) -> id; label_key(actions) -> id; label_key(connectors) -> id. + +metric_names(MetricWithType) when is_list(MetricWithType) -> + [Name || {Name, _Type} <- MetricWithType]. From fb330f77e602dcf0a587c89535c93af3b09db61c Mon Sep 17 00:00:00 2001 From: JimMoen Date: Tue, 16 Jan 2024 16:32:41 +0800 Subject: [PATCH 24/38] feat(prometheus): api `format_mode` parameter support - node(default): The only supported format_mode for PushGateway. Return the current node's metrics. - Without label `node_name` with content-type: `text/plain`. - Without key `node_name` with content-type: `application/json` - nodes_aggregated: Return all nodes metrics Arithmetic-Sum or Logical-Sum. See details in callback modules. - Logical-Sum for metrics named with `xxx_enable` or `xxx_status`. - Arithmetic-Sum for other metrics. `node_name` field: - Without label `node_name` with content-type: `text/plain`. - Without key `node_name` with content-type: `application/json` - nodes_unaggregated: Return all nodes metrics without aggregated. `node_name` field: - _With_ label `node_name` with content-type: `text/plain`. - _With_ key `node_name` with content-type: `application/json` --- .../src/emqx_prometheus_api.erl | 60 ++++++++++++++++--- .../src/emqx_prometheus_sup.erl | 3 + 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 32cb89177..9263b6a6a 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -21,10 +21,19 @@ -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/logger.hrl"). +-import( + hoconsc, + [ + mk/2, + ref/1 + ] +). + -export([ api_spec/0, paths/0, - schema/1 + schema/1, + fields/1 ]). -export([ @@ -35,6 +44,8 @@ ]). -define(TAGS, [<<"Monitor">>]). +-define(IS_TRUE(Val), ((Val =:= true) orelse (Val =:= <<"true">>))). +-define(IS_FALSE(Val), ((Val =:= false) orelse (Val =:= <<"false">>))). api_spec() -> emqx_dashboard_swagger:spec(?MODULE, #{check_schema => true}). @@ -73,6 +84,7 @@ schema("/prometheus/auth") -> #{ description => ?DESC(get_prom_auth_data), tags => ?TAGS, + parameters => [ref(format_mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -85,6 +97,7 @@ schema("/prometheus/stats") -> #{ description => ?DESC(get_prom_data), tags => ?TAGS, + parameters => [ref(format_mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -97,6 +110,7 @@ schema("/prometheus/data_integration") -> #{ description => ?DESC(get_prom_data_integration_data), tags => ?TAGS, + parameters => [ref(format_mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -108,6 +122,22 @@ security() -> true -> [#{'basicAuth' => []}, #{'bearerAuth' => []}]; false -> [] end. + +fields(format_mode) -> + [ + {format_mode, + mk( + hoconsc:enum([node, nodes_aggregated, nodes_unaggregated]), + #{ + default => node, + desc => <<"Metrics format mode.">>, + in => query, + required => false, + example => false + } + )} + ]. + %%-------------------------------------------------------------------- %% API Handler funcs %%-------------------------------------------------------------------- @@ -129,21 +159,21 @@ setting(put, #{body := Body}) -> {500, 'INTERNAL_ERROR', Message} end. -stats(get, #{headers := Headers}) -> - collect(emqx_prometheus, Headers). +stats(get, #{headers := Headers, query_string := Qs}) -> + collect(emqx_prometheus, collect_opts(Headers, Qs)). -auth(get, #{headers := Headers}) -> - collect(emqx_prometheus_auth, Headers). +auth(get, #{headers := Headers, query_string := Qs}) -> + collect(emqx_prometheus_auth, collect_opts(Headers, Qs)). -data_integration(get, #{headers := Headers}) -> - collect(emqx_prometheus_data_integration, Headers). +data_integration(get, #{headers := Headers, query_string := Qs}) -> + collect(emqx_prometheus_data_integration, collect_opts(Headers, Qs)). %%-------------------------------------------------------------------- %% Internal funcs %%-------------------------------------------------------------------- -collect(Module, Headers) -> - Type = response_type(Headers), +collect(Module, #{type := Type, format_mode := FormatMode}) -> + erlang:put(format_mode, FormatMode), Data = case erlang:function_exported(Module, collect, 1) of true -> @@ -157,11 +187,23 @@ collect(Module, Headers) -> end, gen_response(Type, Data). +collect_opts(Headers, Qs) -> + #{type => response_type(Headers), format_mode => format_mode(Qs)}. + response_type(#{<<"accept">> := <<"application/json">>}) -> <<"json">>; response_type(_) -> <<"prometheus">>. +format_mode(#{<<"format_mode">> := <<"node">>}) -> + node; +format_mode(#{<<"format_mode">> := <<"nodes_aggregated">>}) -> + nodes_aggregated; +format_mode(#{<<"format_mode">> := <<"nodes_unaggregated">>}) -> + nodes_unaggregated; +format_mode(_) -> + node. + gen_response(<<"json">>, Data) -> {200, Data}; gen_response(<<"prometheus">>, Data) -> diff --git a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl index ea8a2ebaa..1c7eb73e4 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl @@ -67,6 +67,9 @@ init([]) -> Children = case emqx_prometheus_config:is_push_gateway_server_enabled(Conf) of false -> []; + %% TODO: add push gateway for endpoints + %% `/prometheus/auth` + %% `/prometheus/data_integration` true -> [?CHILD(emqx_prometheus, Conf)] end, {ok, {{one_for_one, 10, 3600}, Children}}. From 57f3efde63dd254952754060faa9773ced15a860 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Wed, 17 Jan 2024 17:18:46 +0800 Subject: [PATCH 25/38] feat(prom_auth): cluster metrics with different format-mode --- apps/emqx/priv/bpapi.versions | 1 + .../src/emqx_prometheus_api.erl | 12 +- .../src/emqx_prometheus_auth.erl | 312 +++++++++++++----- .../src/proto/emqx_prometheus_proto_v2.erl | 52 +++ 4 files changed, 290 insertions(+), 87 deletions(-) create mode 100644 apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl diff --git a/apps/emqx/priv/bpapi.versions b/apps/emqx/priv/bpapi.versions index 9bd824242..859d7fbe0 100644 --- a/apps/emqx/priv/bpapi.versions +++ b/apps/emqx/priv/bpapi.versions @@ -58,6 +58,7 @@ {emqx_persistent_session_ds,1}. {emqx_plugins,1}. {emqx_prometheus,1}. +{emqx_prometheus,2}. {emqx_resource,1}. {emqx_retainer,1}. {emqx_retainer,2}. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 9263b6a6a..ea71e7ee2 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -43,6 +43,8 @@ data_integration/2 ]). +-export([lookup_from_local_nodes/3]). + -define(TAGS, [<<"Monitor">>]). -define(IS_TRUE(Val), ((Val =:= true) orelse (Val =:= <<"true">>))). -define(IS_FALSE(Val), ((Val =:= false) orelse (Val =:= <<"false">>))). @@ -138,6 +140,10 @@ fields(format_mode) -> )} ]. +%% bpapi +lookup_from_local_nodes(M, F, A) -> + erlang:apply(M, F, A). + %%-------------------------------------------------------------------- %% API Handler funcs %%-------------------------------------------------------------------- @@ -195,11 +201,11 @@ response_type(#{<<"accept">> := <<"application/json">>}) -> response_type(_) -> <<"prometheus">>. -format_mode(#{<<"format_mode">> := <<"node">>}) -> +format_mode(#{<<"format_mode">> := node}) -> node; -format_mode(#{<<"format_mode">> := <<"nodes_aggregated">>}) -> +format_mode(#{<<"format_mode">> := nodes_aggregated}) -> nodes_aggregated; -format_mode(#{<<"format_mode">> := <<"nodes_unaggregated">>}) -> +format_mode(#{<<"format_mode">> := nodes_unaggregated}) -> nodes_unaggregated; format_mode(_) -> node. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 57406d2d2..3e9a9d007 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -23,6 +23,11 @@ -export([collect/1]). +%% for bpapi +-export([ + fetch_metric_data_from_local_node/0 +]). + -include("emqx_prometheus.hrl"). -include_lib("emqx_auth/include/emqx_authn_chains.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -65,6 +70,7 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). +-define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). -define(AUTHNS_WITH_TYPE, [ {emqx_authn_enable, gauge}, @@ -96,6 +102,13 @@ {emqx_banned_count, gauge} ]). +-define(LOGICAL_SUM_METRIC_NAMES, [ + emqx_authn_enable, + emqx_authn_status, + emqx_authz_enable, + emqx_authz_status +]). + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -109,37 +122,29 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, authn_data()), - ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, authn_users_count_data()), - ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, authz_data()), - ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, authz_rules_count_data()), - ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, banned_count_data()), + RawData = raw_data(erlang:get(format_mode)), + ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), + ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), + ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), + ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, ?MG(authz_rules_count, RawData)), + ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, ?MG(banned_count, RawData)), ok; collect_mf(_, _) -> ok. %% @private collect(<<"json">>) -> + FormatMode = erlang:get(format_mode), + RawData = raw_data(FormatMode), + %% TODO: merge node name in json format #{ - emqx_authn => collect_auth_data(authn), - emqx_authz => collect_auth_data(authz), + emqx_authn => collect_json_data(?MG(authn, RawData)), + emqx_authz => collect_json_data(?MG(authz, RawData)), emqx_banned => collect_banned_data() }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_AUTH_REGISTRY). -collect_auth_data(AuthDataType) -> - maps:fold( - fun(K, V, Acc) -> - zip_auth_metrics(AuthDataType, K, V, Acc) - end, - [], - auth_data(AuthDataType) - ). - -collect_banned_data() -> - #{emqx_banned_count => banned_count_data()}. - add_collect_family(Callback, MetricWithType, Data) -> _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type} <- MetricWithType], ok. @@ -150,6 +155,38 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_auth(Name, Metrics). +%% @private +fetch_metric_data_from_local_node() -> + {node(self()), #{ + authn => authn_data(), + authz => authz_data() + }}. + +fetch_cluster_consistented_metric_data() -> + #{ + authn_users_count => authn_users_count_data(), + authz_rules_count => authz_rules_count_data(), + banned_count => banned_count_data() + }. + +%% raw data for different format modes +raw_data(nodes_aggregated) -> + AggregatedNodesMetrics = aggre_cluster(all_nodes_metrics()), + maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(nodes_unaggregated) -> + %% then fold from all nodes + AllNodesMetrics = with_node_name_label(all_nodes_metrics()), + maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(node) -> + {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), + maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). + +all_nodes_metrics() -> + Nodes = mria:running_nodes(), + _ResL = emqx_prometheus_proto_v2:raw_prom_data( + Nodes, ?MODULE, fetch_metric_data_from_local_node, [] + ). + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -370,9 +407,174 @@ banned_count_data() -> mnesia_size(?BANNED_TABLE). %%-------------------------------------------------------------------- -%% Helper functions +%% Collect functions %%-------------------------------------------------------------------- +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `application/json` +collect_json_data(Data) -> + maps:fold( + fun(K, V, Acc) -> + zip_json_metrics(K, V, Acc) + end, + [], + Data + ). + +collect_banned_data() -> + #{emqx_banned_count => banned_count_data()}. + +zip_json_metrics(Key, Points, [] = _AccIn) -> + lists:foldl( + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + %% for initialized empty AccIn + %% The following fields will be put into Result + %% For Authn: + %% `id`, `emqx_authn_users_count` + %% For Authz: + %% `type`, `emqx_authz_rules_count`n + Point = (maps:merge(LablesKVMap, users_or_rule_count(LablesKVMap)))#{Key => Metric}, + [Point | AccIn2] + end, + [], + Points + ); +zip_json_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl( + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] + end, + [], + Points + ), + lists:zipwith( + fun(AllResulted, ThisKeyMetricOut) -> + maps:merge(AllResulted, ThisKeyMetricOut) + end, + AllResultedAcc, + ThisKeyResult + ). + +user_rule_data(authn) -> authn_users_count_data(); +user_rule_data(authz) -> authz_rules_count_data(). + +users_or_rule_count(#{id := Id}) -> + #{emqx_authn_users_count := Points} = user_rule_data(authn), + case lists:keyfind([{id, Id}], 1, Points) of + {_, Metric} -> + #{emqx_authn_users_count => Metric}; + false -> + #{} + end; +users_or_rule_count(#{type := Type}) -> + #{emqx_authz_rules_count := Points} = user_rule_data(authz), + case lists:keyfind([{type, Type}], 1, Points) of + {_, Metric} -> + #{emqx_authz_rules_count => Metric}; + false -> + #{} + end; +users_or_rule_count(_) -> + #{}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `text/plain` +aggre_cluster(ResL) -> + do_aggre_cluster(ResL, aggre_or_zip_init_acc()). + +do_aggre_cluster([], AccIn) -> + AccIn; +do_aggre_cluster( + [{ok, {_NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], + #{authn := AuthnAcc, authz := AuthzAcc} = AccIn +) -> + do_aggre_cluster( + Rest, + AccIn#{ + authn => do_aggre_metric(NodeAuthnMetrics, AuthnAcc), + authz => do_aggre_metric(NodeAuthzMetrics, AuthzAcc) + } + ); +do_aggre_cluster([{_, _} | Rest], AccIn) -> + do_aggre_cluster(Rest, AccIn). + +do_aggre_metric(NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_aggre_metric(K, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NMetric = + case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of + true -> + logic_sum(Metric, ?PG0(Labels, AccIn)); + false -> + Metric + ?PG0(Labels, AccIn) + end, + [{Labels, NMetric} | AccIn] + end, + AccL, + NodeMetrics + ). + +logic_sum(N1, N2) when + (N1 > 0 andalso N2 > 0) +-> + 1; +logic_sum(_, _) -> + 0. + +with_node_name_label(ResL) -> + do_with_node_name_label(ResL, aggre_or_zip_init_acc()). + +do_with_node_name_label([], AccIn) -> + AccIn; +do_with_node_name_label( + [{ok, {NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], + #{authn := AuthnAcc, authz := AuthzAcc} = AccIn +) -> + do_with_node_name_label( + Rest, + AccIn#{ + authn => zip_with_node_name(NodeName, NodeAuthnMetrics, AuthnAcc), + authz => zip_with_node_name(NodeName, NodeAuthzMetrics, AuthzAcc) + } + ); +do_with_node_name_label([{_, _} | Rest], AccIn) -> + do_with_node_name_label(Rest, AccIn). + +zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NLabels = [{node_name, NodeName} | Labels], + [{NLabels, Metric} | AccIn] + end, + AccL, + NodeMetrics + ). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helper funcs + authenticator_id(Authn) -> emqx_authn_chains:authenticator_id(Authn). @@ -398,69 +600,11 @@ boolean_to_number(false) -> 0. status_to_number(connected) -> 1; status_to_number(stopped) -> 0. -zip_auth_metrics(AuthDataType, K, V, Acc) -> - LabelK = label_key(AuthDataType), - UserOrRuleD = user_rule_data(AuthDataType), - do_zip_auth_metrics(LabelK, UserOrRuleD, K, V, Acc). - -do_zip_auth_metrics(LabelK, UserOrRuleD, Key, Points, [] = _AccIn) -> - lists:foldl( - fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> - %% for initialized empty AccIn - %% The following fields will be put into Result - %% For Authn: - %% `id`, `emqx_authn_users_count` - %% For Authz: - %% `type`, `emqx_authz_rules_count` - Point = (users_or_rule_count(LabelK, LabelV, UserOrRuleD))#{ - LabelK => LabelV, Key => Metric - }, - [Point | AccIn2] - end, - [], - Points - ); -do_zip_auth_metrics(LabelK, _UserOrRuleD, Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl( - fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> - [#{LabelK => Id, Key => Metric} | AccIn2] - end, - [], - Points - ), - lists:zipwith( - fun(AllResulted, ThisKeyMetricOut) -> - maps:merge(AllResulted, ThisKeyMetricOut) - end, - AllResultedAcc, - ThisKeyResult - ). - -auth_data(authn) -> authn_data(); -auth_data(authz) -> authz_data(). - -label_key(authn) -> id; -label_key(authz) -> type. - -user_rule_data(authn) -> authn_users_count_data(); -user_rule_data(authz) -> authz_rules_count_data(). - -users_or_rule_count(id, Id, #{emqx_authn_users_count := Points} = _AuthnUsersD) -> - case lists:keyfind([{id, Id}], 1, Points) of - {_, Metric} -> - #{emqx_authn_users_count => Metric}; - false -> - #{} - end; -users_or_rule_count(type, Type, #{emqx_authz_rules_count := Points} = _AuthzRulesD) -> - case lists:keyfind([{type, Type}], 1, Points) of - {_, Metric} -> - #{emqx_authz_rules_count => Metric}; - false -> - #{} - end; -users_or_rule_count(_, _, _) -> - #{}. - metric_names(MetricWithType) when is_list(MetricWithType) -> [Name || {Name, _Type} <- MetricWithType]. + +aggre_or_zip_init_acc() -> + #{ + authn => maps:from_keys(authn_metric_names(), []), + authz => maps:from_keys(authz_metric_names(), []) + }. diff --git a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl new file mode 100644 index 000000000..e3f9b0a26 --- /dev/null +++ b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl @@ -0,0 +1,52 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_prometheus_proto_v2). + +-behaviour(emqx_bpapi). + +-export([ + introduced_in/0, + start/1, + stop/1, + + raw_prom_data/4 +]). + +-include_lib("emqx/include/bpapi.hrl"). + +introduced_in() -> + "5.5.0". + +-spec start([node()]) -> emqx_rpc:multicall_result(). +start(Nodes) -> + rpc:multicall(Nodes, emqx_prometheus, do_start, [], 5000). + +-spec stop([node()]) -> emqx_rpc:multicall_result(). +stop(Nodes) -> + rpc:multicall(Nodes, emqx_prometheus, do_stop, [], 5000). + +-type key() :: atom() | binary() | [byte()]. + +-spec raw_prom_data([node()], key(), key(), key()) -> emqx_rpc:erpc_multicall(term()). +raw_prom_data(Nodes, M, F, A) -> + erpc:multicall( + Nodes, + emqx_prometheus_api, + lookup_from_local_nodes, + [M, F, A], + 5000 + ). From 4fb1ff2f9d44d37c53072da0fc50babbaba07c2e Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 03:22:26 +0800 Subject: [PATCH 26/38] fix(prom_api): format-mode example value --- apps/emqx_prometheus/src/emqx_prometheus_api.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index ea71e7ee2..9b903b53a 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -135,7 +135,7 @@ fields(format_mode) -> desc => <<"Metrics format mode.">>, in => query, required => false, - example => false + example => node } )} ]. From 5914eb5ca58e2b8c99bd0935da8aef87da2b67d3 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 03:27:37 +0800 Subject: [PATCH 27/38] fix(prom_cert): rm cacertfile expiry epoch --- apps/emqx_prometheus/src/emqx_prometheus.erl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 286243c0c..264d818c9 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -746,10 +746,9 @@ emqx_certs() -> -spec emqx_certs_data() -> [_Point :: {[Label], Epoch}] when - Label :: TypeLabel | NameLabel | CertTypeLabel, + Label :: TypeLabel | NameLabel, TypeLabel :: {listener_type, ssl | wss | quic}, NameLabel :: {listener_name, atom()}, - CertTypeLabel :: {cert_type, cacertfile | certfile}, Epoch :: non_neg_integer(). emqx_certs_data() -> case emqx_config:get([listeners], undefined) of @@ -769,7 +768,7 @@ emqx_certs_data() -> points_of_listeners(Type, AllListeners) -> do_points_of_listeners(Type, maps:get(Type, AllListeners, undefined)). --define(CERT_TYPES, [cacertfile, certfile]). +-define(CERT_TYPES, [certfile]). -spec do_points_of_listeners(Type, TypeOfListeners) -> [_Point :: {[{LabelKey, LabelValue}], Epoch}] @@ -792,7 +791,7 @@ do_points_of_listeners(ListenerType, TypeOfListeners) -> ) of undefined -> AccIn; - Path -> [gen_point(ListenerType, Name, CertType, Path) | AccIn] + Path -> [gen_point(ListenerType, Name, Path) | AccIn] end end, [], @@ -803,13 +802,12 @@ do_points_of_listeners(ListenerType, TypeOfListeners) -> maps:keys(TypeOfListeners) ). -gen_point(Type, Name, CertType, Path) -> +gen_point(Type, Name, Path) -> { %% Labels: [{_Labelkey, _LabelValue}] [ {listener_type, Type}, - {listener_name, Name}, - {cert_type, CertType} + {listener_name, Name} ], %% Value cert_expiry_at_from_path(Path) From 5534d5e9dee427b0e4cc90a7f23cdbedf8587e6d Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 09:17:22 +0800 Subject: [PATCH 28/38] fix(bpapi): make static_check happy --- apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl index e3f9b0a26..e770dc0ab 100644 --- a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl +++ b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v2.erl @@ -39,9 +39,10 @@ start(Nodes) -> stop(Nodes) -> rpc:multicall(Nodes, emqx_prometheus, do_stop, [], 5000). --type key() :: atom() | binary() | [byte()]. +-type key() :: atom(). +-type arg() :: list(term()). --spec raw_prom_data([node()], key(), key(), key()) -> emqx_rpc:erpc_multicall(term()). +-spec raw_prom_data([node()], key(), key(), arg()) -> emqx_rpc:erpc_multicall(term()). raw_prom_data(Nodes, M, F, A) -> erpc:multicall( Nodes, From 9627124d678621358267b236882c91f5628e21df Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 10:08:41 +0800 Subject: [PATCH 29/38] fix(prom_di): metric `emqx_rule_enable` --- .../src/emqx_prometheus_data_integration.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index c41d9a6fb..edbdc1afb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -61,6 +61,7 @@ ]). -define(RULES_SPECIFIC_WITH_TYPE, [ + {emqx_rule_enable, gauge}, {emqx_rule_matched, counter}, {emqx_rule_failed, counter}, {emqx_rule_passed, counter}, @@ -234,6 +235,8 @@ collect_di(K = emqx_connectors_count, Data) -> %%==================== %% Specific Rule +collect_di(K = emqx_rule_enable, Data) -> + gauge_metrics(?MG(K, Data)); collect_di(K = emqx_rule_matched, Data) -> counter_metrics(?MG(K, Data)); collect_di(K = emqx_rule_failed, Data) -> @@ -365,10 +368,11 @@ merge_acc_with_rules(Id, RuleMetrics, PointsAcc) -> rule_point(Id, V) -> {[{id, Id}], V}. -get_metric(#{id := Id} = _Rule) -> +get_metric(#{id := Id, enable := Bool} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of #{counters := Counters} -> #{ + emqx_rule_enable => boolean_to_number(Bool), emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), emqx_rule_passed => ?MG(passed, Counters), From c3da7923233b1d07f8208c47d5c8137fdc66f2da Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 11:23:23 +0800 Subject: [PATCH 30/38] feat(prom_di): cluster aggregated/unaggregated metrics --- .../src/emqx_prometheus_data_integration.erl | 310 ++++++++++++++---- 1 file changed, 246 insertions(+), 64 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index edbdc1afb..06a417d2d 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -24,6 +24,11 @@ -export([collect/1]). +%% for bpapi +-export([ + fetch_metric_data_from_local_node/0 +]). + -export([add_collect_family/4]). -include("emqx_prometheus.hrl"). @@ -105,6 +110,12 @@ -else. -endif. +-define(LOGICAL_SUM_METRIC_NAMES, [ + emqx_rule_enable, + emqx_connector_enable, + emqx_connector_status +]). + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -117,23 +128,24 @@ deregister_cleanup(_) -> ok. _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> - Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), + RawData = raw_data(erlang:get(format_mode)), + %% Data Integration Overview - ok = add_collect_family(Callback, ?RULES_WITH_TYPE, rules_data(Rules)), - ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, connectors_data(Bridges)), + ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), + ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, ?MG(connectors_data, RawData)), ok = maybe_collect_family_schema_registry(Callback), %% Rule Specific - ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, rule_specific_data(Rules)), + RuleSpecificDs = ?MG(rule_specific_data, RawData), + ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, RuleSpecificDs), %% Action Specific - ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, action_specific_data(Bridges)), + ActionSpecificDs = ?MG(action_specific_data, RawData), + ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, ActionSpecificDs), %% Connector Specific - ok = add_collect_family( - Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, connector_specific_data(Bridges) - ), + ConnectorSpecificDs = ?MG(connector_specific_data, RawData), + ok = add_collect_family(Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, ConnectorSpecificDs), ok; collect_mf(_, _) -> @@ -141,13 +153,14 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> + RawData = raw_data(erlang:get(format_mode)), Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ data_integration_overview => collect_data_integration_overview(Rules, Bridges), - rules => collect_data_integration(rules, Rules), - actions => collect_data_integration(actions, Bridges), - connectors => collect_data_integration(connectors, Bridges) + rules => collect_json_data(?MG(rule_specific_data, RawData)), + actions => collect_json_data(?MG(action_specific_data, RawData)), + connectors => collect_json_data(?MG(connector_specific_data, RawData)) }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). @@ -166,32 +179,23 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). -collect_data_integration_overview(Rules, Bridges) -> - RulesD = rules_data(Rules), - ConnectorsD = connectors_data(Bridges), +%% @private +fetch_metric_data_from_local_node() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + {node(self()), #{ + rule_specific_data => rule_specific_data(Rules), + action_specific_data => action_specific_data(Bridges), + connector_specific_data => connector_specific_data(Bridges) + }}. - M1 = lists:foldl( - fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, - #{}, - metric_names(?RULES_WITH_TYPE) - ), - M2 = lists:foldl( - fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, - #{}, - metric_names(?CONNECTORS_WITH_TYPE) - ), - M3 = maybe_collect_schema_registry(), - - lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). - -collect_data_integration(Type, DataSeed) -> - maps:fold( - fun(K, V, Acc) -> - zip_metrics(Type, K, V, Acc) - end, - [], - di_data(Type, DataSeed) - ). +fetch_cluster_consistented_metric_data() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + (maybe_collect_schema_registry())#{ + rules_data => rules_data(Rules), + connectors_data => connectors_data(Bridges) + }. -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> @@ -208,6 +212,24 @@ maybe_collect_schema_registry() -> #{}. -endif. +%% raw data for different format modes +raw_data(nodes_aggregated) -> + AggregatedNodesMetrics = aggre_cluster(metrics_data_from_all_nodes()), + maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(nodes_unaggregated) -> + %% then fold from all nodes + AllNodesMetrics = with_node_name_label(metrics_data_from_all_nodes()), + maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); +raw_data(node) -> + {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), + maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). + +metrics_data_from_all_nodes() -> + Nodes = mria:running_nodes(), + _ResL = emqx_prometheus_proto_v2:raw_prom_data( + Nodes, ?MODULE, fetch_metric_data_from_local_node, [] + ). + %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -352,7 +374,7 @@ rule_specific_data(Rules) -> fun(#{id := Id} = Rule, AccIn) -> merge_acc_with_rules(Id, get_metric(Rule), AccIn) end, - maps:from_keys(metric_names(?RULES_SPECIFIC_WITH_TYPE), []), + maps:from_keys(rule_specific_metric_names(), []), Rules ). @@ -388,6 +410,9 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> } end. +rule_specific_metric_names() -> + metric_names(?RULES_SPECIFIC_WITH_TYPE). + %%==================== %% Specific Action %% With action_id: `{type}:{name}` as label key: `action_id` @@ -398,7 +423,7 @@ action_specific_data(Bridges) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, - maps:from_keys(metric_names(?ACTION_SPECIFIC_WITH_TYPE), []), + maps:from_keys(action_specific_metric_names(), []), Bridges ). @@ -439,6 +464,9 @@ get_bridge_metric(Type, Name) -> } end. +action_specific_metric_names() -> + metric_names(?ACTION_SPECIFIC_WITH_TYPE). + %%==================== %% Specific Connector %% With connector_id: `{type}:{name}` as label key: `connector_id` @@ -449,7 +477,7 @@ connector_specific_data(Bridges) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) end, - maps:from_keys(metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE), []), + maps:from_keys(connectr_specific_metric_names(), []), Bridges ). @@ -473,24 +501,47 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> emqx_connector_status => status_to_number(Status) }. -%%-------------------------------------------------------------------- +connectr_specific_metric_names() -> + metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). %%-------------------------------------------------------------------- -%% Help funcs +%% Collect functions +%%-------------------------------------------------------------------- -boolean_to_number(true) -> 1; -boolean_to_number(false) -> 0. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `application/json` -status_to_number(connected) -> 1; -status_to_number(disconnected) -> 0. +collect_data_integration_overview(Rules, Bridges) -> + RulesD = rules_data(Rules), + ConnectorsD = connectors_data(Bridges), -zip_metrics(Type, K, V, Acc) -> - LabelK = label_key(Type), - do_zip_metrics(LabelK, K, V, Acc). + M1 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, + #{}, + metric_names(?RULES_WITH_TYPE) + ), + M2 = lists:foldl( + fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, + #{}, + metric_names(?CONNECTORS_WITH_TYPE) + ), + M3 = maybe_collect_schema_registry(), -do_zip_metrics(LabelK, Key, Points, [] = _AccIn) -> + lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). + +collect_json_data(Data) -> + maps:fold( + fun(K, V, Acc) -> + zip_json_metrics(K, V, Acc) + end, + [], + Data + ). + +zip_json_metrics(Key, Points, [] = _AccIn) -> lists:foldl( - fun({[{K, LabelV}], Metric}, AccIn2) when K =:= LabelK -> + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), %% for initialized empty AccIn %% The following fields will be put into Result %% For Rules: @@ -500,19 +551,17 @@ do_zip_metrics(LabelK, Key, Points, [] = _AccIn) -> %% FOR Connectors %% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID %% formatted with {type}:{name} - Point = - #{ - LabelK => LabelV, Key => Metric - }, + Point = LablesKVMap#{Key => Metric}, [Point | AccIn2] end, [], Points ); -do_zip_metrics(LabelK, Key, Points, AllResultedAcc) -> +zip_json_metrics(Key, Points, AllResultedAcc) -> ThisKeyResult = lists:foldl( - fun({[{K, Id}], Metric}, AccIn2) when K =:= LabelK -> - [#{LabelK => Id, Key => Metric} | AccIn2] + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] end, [], Points @@ -525,13 +574,146 @@ do_zip_metrics(LabelK, Key, Points, AllResultedAcc) -> ThisKeyResult ). -di_data(rules, Rules) -> rule_specific_data(Rules); -di_data(actions, Bridges) -> action_specific_data(Bridges); -di_data(connectors, Bridges) -> connector_specific_data(Bridges). +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `text/plain` +aggre_cluster(ResL) -> + do_aggre_cluster(ResL, aggre_or_zip_init_acc()). -label_key(rules) -> id; -label_key(actions) -> id; -label_key(connectors) -> id. +do_aggre_cluster([], AccIn) -> + AccIn; +do_aggre_cluster( + [ + {ok, + {_NodeName, #{ + rule_specific_data := NodeRuleMetrics, + action_specific_data := NodeActionMetrics, + connector_specific_data := NodeConnectorMetrics + }}} + | Rest + ], + #{ + rule_specific_data := RuleAcc, + action_specific_data := ActionAcc, + connector_specific_data := ConnAcc + } = AccIn +) -> + do_aggre_cluster( + Rest, + AccIn#{ + %% TODO + rule_specific_data => do_aggre_metric(NodeRuleMetrics, RuleAcc), + action_specific_data => do_aggre_metric(NodeActionMetrics, ActionAcc), + connector_specific_data => do_aggre_metric(NodeConnectorMetrics, ConnAcc) + } + ); +do_aggre_cluster([{_, _} | Rest], AccIn) -> + do_aggre_cluster(Rest, AccIn). + +do_aggre_metric(NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +-define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). + +do_aggre_metric(K, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NMetric = + case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of + true -> + logic_sum(Metric, ?PG0(Labels, AccIn)); + false -> + Metric + ?PG0(Labels, AccIn) + end, + [{Labels, NMetric} | AccIn] + end, + AccL, + NodeMetrics + ). + +with_node_name_label(ResL) -> + do_with_node_name_label( + ResL, + aggre_or_zip_init_acc() + ). + +do_with_node_name_label([], AccIn) -> + AccIn; +do_with_node_name_label( + [ + {ok, + {NodeName, #{ + rule_specific_data := NodeRuleMetrics, + action_specific_data := NodeActionMetrics, + connector_specific_data := NodeConnectorMetrics + }}} + | Rest + ], + #{ + rule_specific_data := RuleAcc, + action_specific_data := ActionAcc, + connector_specific_data := ConnAcc + } = AccIn +) -> + do_with_node_name_label( + Rest, + AccIn#{ + rule_specific_data => zip_with_node_name(NodeName, NodeRuleMetrics, RuleAcc), + action_specific_data => zip_with_node_name(NodeName, NodeActionMetrics, ActionAcc), + connector_specific_data => zip_with_node_name(NodeName, NodeConnectorMetrics, ConnAcc) + } + ); +do_with_node_name_label([{_, _} | Rest], AccIn) -> + do_with_node_name_label(Rest, AccIn). + +zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NLabels = [{node_name, NodeName} | Labels], + [{NLabels, Metric} | AccIn] + end, + AccL, + NodeMetrics + ). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helper funcs + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +status_to_number(disconnected) -> 0. + +logic_sum(N1, N2) when + (N1 > 0 andalso N2 > 0) +-> + 1; +logic_sum(_, _) -> + 0. metric_names(MetricWithType) when is_list(MetricWithType) -> [Name || {Name, _Type} <- MetricWithType]. + +aggre_or_zip_init_acc() -> + #{ + rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), + action_specific_data => maps:from_keys(action_specific_metric_names(), []), + connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) + }. From 8cb12c6a74ffafe96d2d66441fda44de3c277ca0 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 17:49:24 +0800 Subject: [PATCH 31/38] refactor: use utils func for prom_auth and prom_di --- .../src/emqx_prometheus_auth.erl | 162 +++----------- .../src/emqx_prometheus_data_integration.erl | 198 +++--------------- .../src/emqx_prometheus_utils.erl | 160 ++++++++++++++ 3 files changed, 214 insertions(+), 306 deletions(-) create mode 100644 apps/emqx_prometheus/src/emqx_prometheus_utils.erl diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 3e9a9d007..de91fb8b2 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -28,6 +28,11 @@ fetch_metric_data_from_local_node/0 ]). +%% %% @private +-export([ + zip_json_auth_metrics/3 +]). + -include("emqx_prometheus.hrl"). -include_lib("emqx_auth/include/emqx_authn_chains.hrl"). -include_lib("prometheus/include/prometheus.hrl"). @@ -282,7 +287,7 @@ lookup_authn_metrics_local(Id) -> case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authn_status => status_to_number(Status), + emqx_authn_status => emqx_prometheus_utils:status_to_number(Status), emqx_authn_nomatch => ?MG0(nomatch, Counters), emqx_authn_total => ?MG0(total, Counters), emqx_authn_success => ?MG0(success, Counters), @@ -293,7 +298,7 @@ lookup_authn_metrics_local(Id) -> end. authn_metric_names() -> - metric_names(?AUTHNS_WITH_TYPE). + emqx_prometheus_utils:metric_names(?AUTHNS_WITH_TYPE). %%==================== %% Authn users count @@ -360,7 +365,7 @@ lookup_authz_metrics_local(Type) -> case emqx_authz_api_sources:lookup_from_local_node(Type) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authz_status => status_to_number(Status), + emqx_authz_status => emqx_prometheus_utils:status_to_number(Status), emqx_authz_nomatch => ?MG0(nomatch, Counters), emqx_authz_total => ?MG0(total, Counters), emqx_authz_success => ?MG0(success, Counters), @@ -371,7 +376,7 @@ lookup_authz_metrics_local(Type) -> end. authz_metric_names() -> - metric_names(?AUTHZS_WITH_TYPE). + emqx_prometheus_utils:metric_names(?AUTHZS_WITH_TYPE). %%==================== %% Authz rules count @@ -412,56 +417,35 @@ banned_count_data() -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `application/json` + collect_json_data(Data) -> - maps:fold( - fun(K, V, Acc) -> - zip_json_metrics(K, V, Acc) - end, - [], - Data - ). + emqx_prometheus_utils:collect_json_data(Data, fun zip_json_auth_metrics/3). collect_banned_data() -> #{emqx_banned_count => banned_count_data()}. -zip_json_metrics(Key, Points, [] = _AccIn) -> +%% for initialized empty AccIn +%% The following fields will be put into Result +%% For Authn: +%% `id`, `emqx_authn_users_count` +%% For Authz: +%% `type`, `emqx_authz_rules_count`n +zip_json_auth_metrics(Key, Points, [] = _AccIn) -> lists:foldl( fun({Lables, Metric}, AccIn2) -> LablesKVMap = maps:from_list(Lables), - %% for initialized empty AccIn - %% The following fields will be put into Result - %% For Authn: - %% `id`, `emqx_authn_users_count` - %% For Authz: - %% `type`, `emqx_authz_rules_count`n Point = (maps:merge(LablesKVMap, users_or_rule_count(LablesKVMap)))#{Key => Metric}, [Point | AccIn2] end, [], Points ); -zip_json_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl( - fun({Lables, Metric}, AccIn2) -> - LablesKVMap = maps:from_list(Lables), - [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] - end, - [], - Points - ), - lists:zipwith( - fun(AllResulted, ThisKeyMetricOut) -> - maps:merge(AllResulted, ThisKeyMetricOut) - end, - AllResultedAcc, - ThisKeyResult - ). - -user_rule_data(authn) -> authn_users_count_data(); -user_rule_data(authz) -> authz_rules_count_data(). +zip_json_auth_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). users_or_rule_count(#{id := Id}) -> - #{emqx_authn_users_count := Points} = user_rule_data(authn), + #{emqx_authn_users_count := Points} = authn_users_count_data(), case lists:keyfind([{id, Id}], 1, Points) of {_, Metric} -> #{emqx_authn_users_count => Metric}; @@ -469,7 +453,7 @@ users_or_rule_count(#{id := Id}) -> #{} end; users_or_rule_count(#{type := Type}) -> - #{emqx_authz_rules_count := Points} = user_rule_data(authz), + #{emqx_authz_rules_count := Points} = authz_rules_count_data(), case lists:keyfind([{type, Type}], 1, Points) of {_, Metric} -> #{emqx_authz_rules_count => Metric}; @@ -482,95 +466,10 @@ users_or_rule_count(_) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `text/plain` aggre_cluster(ResL) -> - do_aggre_cluster(ResL, aggre_or_zip_init_acc()). - -do_aggre_cluster([], AccIn) -> - AccIn; -do_aggre_cluster( - [{ok, {_NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], - #{authn := AuthnAcc, authz := AuthzAcc} = AccIn -) -> - do_aggre_cluster( - Rest, - AccIn#{ - authn => do_aggre_metric(NodeAuthnMetrics, AuthnAcc), - authz => do_aggre_metric(NodeAuthzMetrics, AuthzAcc) - } - ); -do_aggre_cluster([{_, _} | Rest], AccIn) -> - do_aggre_cluster(Rest, AccIn). - -do_aggre_metric(NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - -do_aggre_metric(K, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NMetric = - case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of - true -> - logic_sum(Metric, ?PG0(Labels, AccIn)); - false -> - Metric + ?PG0(Labels, AccIn) - end, - [{Labels, NMetric} | AccIn] - end, - AccL, - NodeMetrics - ). - -logic_sum(N1, N2) when - (N1 > 0 andalso N2 > 0) --> - 1; -logic_sum(_, _) -> - 0. + emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). with_node_name_label(ResL) -> - do_with_node_name_label(ResL, aggre_or_zip_init_acc()). - -do_with_node_name_label([], AccIn) -> - AccIn; -do_with_node_name_label( - [{ok, {NodeName, #{authn := NodeAuthnMetrics, authz := NodeAuthzMetrics}}} | Rest], - #{authn := AuthnAcc, authz := AuthzAcc} = AccIn -) -> - do_with_node_name_label( - Rest, - AccIn#{ - authn => zip_with_node_name(NodeName, NodeAuthnMetrics, AuthnAcc), - authz => zip_with_node_name(NodeName, NodeAuthzMetrics, AuthzAcc) - } - ); -do_with_node_name_label([{_, _} | Rest], AccIn) -> - do_with_node_name_label(Rest, AccIn). - -zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - -do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NLabels = [{node_name, NodeName} | Labels], - [{NLabels, Metric} | AccIn] - end, - AccL, - NodeMetrics - ). + emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Helper funcs @@ -590,19 +489,10 @@ mnesia_size(Tab) -> mnesia:table_info(Tab, size). do_metric(emqx_authn_enable, #{enable := B}, _) -> - boolean_to_number(B); + emqx_prometheus_utils:boolean_to_number(B); do_metric(K, _, Metrics) -> ?MG0(K, Metrics). -boolean_to_number(true) -> 1; -boolean_to_number(false) -> 0. - -status_to_number(connected) -> 1; -status_to_number(stopped) -> 0. - -metric_names(MetricWithType) when is_list(MetricWithType) -> - [Name || {Name, _Type} <- MetricWithType]. - aggre_or_zip_init_acc() -> #{ authn => maps:from_keys(authn_metric_names(), []), diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 06a417d2d..3d31b4e1e 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -24,6 +24,10 @@ -export([collect/1]). +-export([ + zip_json_data_integration_metrics/3 +]). + %% for bpapi -export([ fetch_metric_data_from_local_node/0 @@ -394,7 +398,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of #{counters := Counters} -> #{ - emqx_rule_enable => boolean_to_number(Bool), + emqx_rule_enable => emqx_prometheus_utils:boolean_to_number(Bool), emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), emqx_rule_passed => ?MG(passed, Counters), @@ -411,7 +415,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> end. rule_specific_metric_names() -> - metric_names(?RULES_SPECIFIC_WITH_TYPE). + emqx_prometheus_utils:metric_names(?RULES_SPECIFIC_WITH_TYPE). %%==================== %% Specific Action @@ -465,7 +469,7 @@ get_bridge_metric(Type, Name) -> end. action_specific_metric_names() -> - metric_names(?ACTION_SPECIFIC_WITH_TYPE). + emqx_prometheus_utils:metric_names(?ACTION_SPECIFIC_WITH_TYPE). %%==================== %% Specific Connector @@ -497,12 +501,12 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), Status = ?MG(status, ResourceData), #{ - emqx_connector_enable => boolean_to_number(Enabled), - emqx_connector_status => status_to_number(Status) + emqx_connector_enable => emqx_prometheus_utils:boolean_to_number(Enabled), + emqx_connector_status => emqx_prometheus_utils:status_to_number(Status) }. connectr_specific_metric_names() -> - metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). + emqx_prometheus_utils:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). %%-------------------------------------------------------------------- %% Collect functions @@ -510,7 +514,6 @@ connectr_specific_metric_names() -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `application/json` - collect_data_integration_overview(Rules, Bridges) -> RulesD = rules_data(Rules), ConnectorsD = connectors_data(Bridges), @@ -518,199 +521,54 @@ collect_data_integration_overview(Rules, Bridges) -> M1 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, - metric_names(?RULES_WITH_TYPE) + emqx_prometheus_utils:metric_names(?RULES_WITH_TYPE) ), M2 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, - metric_names(?CONNECTORS_WITH_TYPE) + emqx_prometheus_utils:metric_names(?CONNECTORS_WITH_TYPE) ), M3 = maybe_collect_schema_registry(), lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). collect_json_data(Data) -> - maps:fold( - fun(K, V, Acc) -> - zip_json_metrics(K, V, Acc) - end, - [], - Data - ). + emqx_prometheus_utils:collect_json_data(Data, fun zip_json_data_integration_metrics/3). -zip_json_metrics(Key, Points, [] = _AccIn) -> +%% for initialized empty AccIn +%% The following fields will be put into Result +%% For Rules: +%% `id` => [RULE_ID] +%% For Actions +%% `id` => [ACTION_ID] +%% FOR Connectors +%% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID +%% formatted with {type}:{name} +zip_json_data_integration_metrics(Key, Points, [] = _AccIn) -> lists:foldl( fun({Lables, Metric}, AccIn2) -> LablesKVMap = maps:from_list(Lables), - %% for initialized empty AccIn - %% The following fields will be put into Result - %% For Rules: - %% `id` => [RULE_ID] - %% For Actions - %% `id` => [ACTION_ID] - %% FOR Connectors - %% `id` => [CONNECTOR_ID] %% CONNECTOR_ID = BRIDGE_ID - %% formatted with {type}:{name} Point = LablesKVMap#{Key => Metric}, [Point | AccIn2] end, [], Points ); -zip_json_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl( - fun({Lables, Metric}, AccIn2) -> - LablesKVMap = maps:from_list(Lables), - [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] - end, - [], - Points - ), - lists:zipwith( - fun(AllResulted, ThisKeyMetricOut) -> - maps:merge(AllResulted, ThisKeyMetricOut) - end, - AllResultedAcc, - ThisKeyResult - ). +zip_json_data_integration_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `text/plain` aggre_cluster(ResL) -> - do_aggre_cluster(ResL, aggre_or_zip_init_acc()). - -do_aggre_cluster([], AccIn) -> - AccIn; -do_aggre_cluster( - [ - {ok, - {_NodeName, #{ - rule_specific_data := NodeRuleMetrics, - action_specific_data := NodeActionMetrics, - connector_specific_data := NodeConnectorMetrics - }}} - | Rest - ], - #{ - rule_specific_data := RuleAcc, - action_specific_data := ActionAcc, - connector_specific_data := ConnAcc - } = AccIn -) -> - do_aggre_cluster( - Rest, - AccIn#{ - %% TODO - rule_specific_data => do_aggre_metric(NodeRuleMetrics, RuleAcc), - action_specific_data => do_aggre_metric(NodeActionMetrics, ActionAcc), - connector_specific_data => do_aggre_metric(NodeConnectorMetrics, ConnAcc) - } - ); -do_aggre_cluster([{_, _} | Rest], AccIn) -> - do_aggre_cluster(Rest, AccIn). - -do_aggre_metric(NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_aggre_metric(K, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - --define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). - -do_aggre_metric(K, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NMetric = - case lists:member(K, ?LOGICAL_SUM_METRIC_NAMES) of - true -> - logic_sum(Metric, ?PG0(Labels, AccIn)); - false -> - Metric + ?PG0(Labels, AccIn) - end, - [{Labels, NMetric} | AccIn] - end, - AccL, - NodeMetrics - ). + emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). with_node_name_label(ResL) -> - do_with_node_name_label( - ResL, - aggre_or_zip_init_acc() - ). - -do_with_node_name_label([], AccIn) -> - AccIn; -do_with_node_name_label( - [ - {ok, - {NodeName, #{ - rule_specific_data := NodeRuleMetrics, - action_specific_data := NodeActionMetrics, - connector_specific_data := NodeConnectorMetrics - }}} - | Rest - ], - #{ - rule_specific_data := RuleAcc, - action_specific_data := ActionAcc, - connector_specific_data := ConnAcc - } = AccIn -) -> - do_with_node_name_label( - Rest, - AccIn#{ - rule_specific_data => zip_with_node_name(NodeName, NodeRuleMetrics, RuleAcc), - action_specific_data => zip_with_node_name(NodeName, NodeActionMetrics, ActionAcc), - connector_specific_data => zip_with_node_name(NodeName, NodeConnectorMetrics, ConnAcc) - } - ); -do_with_node_name_label([{_, _} | Rest], AccIn) -> - do_with_node_name_label(Rest, AccIn). - -zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> - lists:foldl( - fun(K, AccIn) -> - NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), - AccIn#{K => NAccL} - end, - AccIn0, - maps:keys(NodeMetrics) - ). - -do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> - lists:foldl( - fun({Labels, Metric}, AccIn) -> - NLabels = [{node_name, NodeName} | Labels], - [{NLabels, Metric} | AccIn] - end, - AccL, - NodeMetrics - ). + emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Helper funcs -boolean_to_number(true) -> 1; -boolean_to_number(false) -> 0. - -status_to_number(connected) -> 1; -status_to_number(disconnected) -> 0. - -logic_sum(N1, N2) when - (N1 > 0 andalso N2 > 0) --> - 1; -logic_sum(_, _) -> - 0. - -metric_names(MetricWithType) when is_list(MetricWithType) -> - [Name || {Name, _Type} <- MetricWithType]. - aggre_or_zip_init_acc() -> #{ rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), diff --git a/apps/emqx_prometheus/src/emqx_prometheus_utils.erl b/apps/emqx_prometheus/src/emqx_prometheus_utils.erl new file mode 100644 index 000000000..fadfb5c47 --- /dev/null +++ b/apps/emqx_prometheus/src/emqx_prometheus_utils.erl @@ -0,0 +1,160 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_prometheus_utils). + +-export([ + collect_json_data/2, + + aggre_cluster/3, + with_node_name_label/2, + + point_to_map_fun/1, + + boolean_to_number/1, + status_to_number/1, + metric_names/1 +]). + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). + +collect_json_data(Data, Func) when is_function(Func, 3) -> + maps:fold( + fun(K, V, Acc) -> + Func(K, V, Acc) + end, + [], + Data + ); +collect_json_data(_, _) -> + error(badarg). + +aggre_cluster(LogicSumKs, ResL, Init) -> + do_aggre_cluster(LogicSumKs, ResL, Init). + +do_aggre_cluster(_LogicSumKs, [], AccIn) -> + AccIn; +do_aggre_cluster(LogicSumKs, [{ok, {_NodeName, NodeMetric}} | Rest], AccIn) -> + do_aggre_cluster( + LogicSumKs, + Rest, + maps:fold( + fun(K, V, AccIn0) -> + AccIn0#{K => aggre_metric(LogicSumKs, V, ?MG(K, AccIn0))} + end, + AccIn, + NodeMetric + ) + %% merge_node_and_acc() + ); +do_aggre_cluster(LogicSumKs, [{_, _} | Rest], AccIn) -> + do_aggre_cluster(LogicSumKs, Rest, AccIn). + +aggre_metric(LogicSumKs, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_aggre_metric( + K, LogicSumKs, ?MG(K, NodeMetrics), ?MG(K, AccIn) + ), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_aggre_metric(K, LogicSumKs, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NMetric = + case lists:member(K, LogicSumKs) of + true -> + logic_sum(Metric, ?PG0(Labels, AccIn)); + false -> + Metric + ?PG0(Labels, AccIn) + end, + [{Labels, NMetric} | AccIn] + end, + AccL, + NodeMetrics + ). + +with_node_name_label(ResL, Init) -> + do_with_node_name_label(ResL, Init). + +do_with_node_name_label([], AccIn) -> + AccIn; +do_with_node_name_label([{ok, {NodeName, NodeMetric}} | Rest], AccIn) -> + do_with_node_name_label( + Rest, + maps:fold( + fun(K, V, AccIn0) -> + AccIn0#{ + K => zip_with_node_name(NodeName, V, ?MG(K, AccIn0)) + } + end, + AccIn, + NodeMetric + ) + ); +do_with_node_name_label([{_, _} | Rest], AccIn) -> + do_with_node_name_label(Rest, AccIn). + +zip_with_node_name(NodeName, NodeMetrics, AccIn0) -> + lists:foldl( + fun(K, AccIn) -> + NAccL = do_zip_with_node_name(NodeName, ?MG(K, NodeMetrics), ?MG(K, AccIn)), + AccIn#{K => NAccL} + end, + AccIn0, + maps:keys(NodeMetrics) + ). + +do_zip_with_node_name(NodeName, NodeMetrics, AccL) -> + lists:foldl( + fun({Labels, Metric}, AccIn) -> + NLabels = [{node, NodeName} | Labels], + [{NLabels, Metric} | AccIn] + end, + AccL, + NodeMetrics + ). + +point_to_map_fun(Key) -> + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + [maps:merge(LablesKVMap, #{Key => Metric}) | AccIn2] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +logic_sum(N1, N2) when + (N1 > 0 andalso N2 > 0) +-> + 1; +logic_sum(_, _) -> + 0. + +boolean_to_number(true) -> 1; +boolean_to_number(false) -> 0. + +status_to_number(connected) -> 1; +%% for auth +status_to_number(stopped) -> 0; +%% for data_integration +status_to_number(disconnected) -> 0. + +metric_names(MetricWithType) when is_list(MetricWithType) -> + [Name || {Name, _Type} <- MetricWithType]. From b480c5b3718349978a4cb52398eed51a1ccc1b8b Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 11:23:40 +0800 Subject: [PATCH 32/38] fix(prom): use name `mode` and macros to put/get format mode --- .../include/emqx_prometheus.hrl | 15 +++++++ .../src/emqx_prometheus_api.erl | 39 +++++++++++-------- .../src/emqx_prometheus_auth.erl | 11 +++--- .../src/emqx_prometheus_data_integration.erl | 10 ++--- 4 files changed, 47 insertions(+), 28 deletions(-) diff --git a/apps/emqx_prometheus/include/emqx_prometheus.hrl b/apps/emqx_prometheus/include/emqx_prometheus.hrl index 9057f2b14..b36f647da 100644 --- a/apps/emqx_prometheus/include/emqx_prometheus.hrl +++ b/apps/emqx_prometheus/include/emqx_prometheus.hrl @@ -28,3 +28,18 @@ ?PROMETHEUS_AUTH_REGISTRY, ?PROMETHEUS_DATA_INTEGRATION_REGISTRY ]). + +-define(PROM_DATA_MODE__NODE, node). +-define(PROM_DATA_MODE__ALL_NODES_AGGREGATED, all_nodes_aggregated). +-define(PROM_DATA_MODE__ALL_NODES_UNAGGREGATED, all_nodes_unaggregated). + +-define(PROM_DATA_MODES, [ + ?PROM_DATA_MODE__NODE, + ?PROM_DATA_MODE__ALL_NODES_AGGREGATED, + ?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED +]). + +-define(PROM_DATA_MODE_KEY__, prom_data_mode). + +-define(PUT_PROM_DATA_MODE(MODE__), erlang:put(?PROM_DATA_MODE_KEY__, MODE__)). +-define(GET_PROM_DATA_MODE(), erlang:get(?PROM_DATA_MODE_KEY__)). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 9b903b53a..47a5b0299 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -18,6 +18,7 @@ -behaviour(minirest_api). +-include("emqx_prometheus.hrl"). -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/logger.hrl"). @@ -86,7 +87,7 @@ schema("/prometheus/auth") -> #{ description => ?DESC(get_prom_auth_data), tags => ?TAGS, - parameters => [ref(format_mode)], + parameters => [ref(mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -99,7 +100,7 @@ schema("/prometheus/stats") -> #{ description => ?DESC(get_prom_data), tags => ?TAGS, - parameters => [ref(format_mode)], + parameters => [ref(mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -112,7 +113,7 @@ schema("/prometheus/data_integration") -> #{ description => ?DESC(get_prom_data_integration_data), tags => ?TAGS, - parameters => [ref(format_mode)], + parameters => [ref(mode)], security => security(), responses => #{200 => prometheus_data_schema()} @@ -125,11 +126,11 @@ security() -> false -> [] end. -fields(format_mode) -> +fields(mode) -> [ - {format_mode, + {mode, mk( - hoconsc:enum([node, nodes_aggregated, nodes_unaggregated]), + hoconsc:enum(?PROM_DATA_MODES), #{ default => node, desc => <<"Metrics format mode.">>, @@ -178,8 +179,13 @@ data_integration(get, #{headers := Headers, query_string := Qs}) -> %% Internal funcs %%-------------------------------------------------------------------- -collect(Module, #{type := Type, format_mode := FormatMode}) -> - erlang:put(format_mode, FormatMode), +collect(Module, #{type := Type, mode := Mode}) -> + %% `Mode` is used to control the format of the returned data + %% It will used in callback `Module:collect_mf/1` to fetch data from node or cluster + %% And use this mode parameter to determine the formatting method of the returned information. + %% Since the arity of the callback function has been fixed. + %% so it is placed in the process dictionary of the current process. + ?PUT_PROM_DATA_MODE(Mode), Data = case erlang:function_exported(Module, collect, 1) of true -> @@ -194,21 +200,20 @@ collect(Module, #{type := Type, format_mode := FormatMode}) -> gen_response(Type, Data). collect_opts(Headers, Qs) -> - #{type => response_type(Headers), format_mode => format_mode(Qs)}. + #{type => response_type(Headers), mode => mode(Qs)}. response_type(#{<<"accept">> := <<"application/json">>}) -> <<"json">>; response_type(_) -> <<"prometheus">>. -format_mode(#{<<"format_mode">> := node}) -> - node; -format_mode(#{<<"format_mode">> := nodes_aggregated}) -> - nodes_aggregated; -format_mode(#{<<"format_mode">> := nodes_unaggregated}) -> - nodes_unaggregated; -format_mode(_) -> - node. +mode(#{<<"mode">> := Mode}) -> + case lists:member(Mode, ?PROM_DATA_MODES) of + true -> Mode; + false -> ?PROM_DATA_MODE__NODE + end; +mode(_) -> + ?PROM_DATA_MODE__NODE. gen_response(<<"json">>, Data) -> {200, Data}; diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index de91fb8b2..02010aaf7 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -127,7 +127,7 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - RawData = raw_data(erlang:get(format_mode)), + RawData = raw_data(?GET_PROM_DATA_MODE()), ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), @@ -139,8 +139,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - FormatMode = erlang:get(format_mode), - RawData = raw_data(FormatMode), + RawData = raw_data(?GET_PROM_DATA_MODE()), %% TODO: merge node name in json format #{ emqx_authn => collect_json_data(?MG(authn, RawData)), @@ -175,14 +174,14 @@ fetch_cluster_consistented_metric_data() -> }. %% raw data for different format modes -raw_data(nodes_aggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> AggregatedNodesMetrics = aggre_cluster(all_nodes_metrics()), maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(nodes_unaggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> %% then fold from all nodes AllNodesMetrics = with_node_name_label(all_nodes_metrics()), maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(node) -> +raw_data(?PROM_DATA_MODE__NODE) -> {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 3d31b4e1e..729e1f640 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -132,7 +132,7 @@ deregister_cleanup(_) -> ok. _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> - RawData = raw_data(erlang:get(format_mode)), + RawData = raw_data(?GET_PROM_DATA_MODE()), %% Data Integration Overview ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), @@ -157,7 +157,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - RawData = raw_data(erlang:get(format_mode)), + RawData = raw_data(?GET_PROM_DATA_MODE()), Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ @@ -217,14 +217,14 @@ maybe_collect_schema_registry() -> -endif. %% raw data for different format modes -raw_data(nodes_aggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> AggregatedNodesMetrics = aggre_cluster(metrics_data_from_all_nodes()), maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(nodes_unaggregated) -> +raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> %% then fold from all nodes AllNodesMetrics = with_node_name_label(metrics_data_from_all_nodes()), maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(node) -> +raw_data(?PROM_DATA_MODE__NODE) -> {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). From c6c1a7fc288b3ce6f0fb9bf0c70edd1db2c3f421 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 15:52:09 +0800 Subject: [PATCH 33/38] refactor(prom): prom_auth and prom_di as prom_cluster behaviour --- .../src/emqx_prometheus_auth.erl | 68 ++++------- ..._utils.erl => emqx_prometheus_cluster.erl} | 43 ++++++- .../src/emqx_prometheus_data_integration.erl | 115 ++++++++---------- 3 files changed, 113 insertions(+), 113 deletions(-) rename apps/emqx_prometheus/src/{emqx_prometheus_utils.erl => emqx_prometheus_cluster.erl} (76%) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 02010aaf7..5fa9057da 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -24,8 +24,12 @@ -export([collect/1]). %% for bpapi +-behaviour(emqx_prometheus_cluster). -export([ - fetch_metric_data_from_local_node/0 + fetch_data_from_local_node/0, + fetch_cluster_consistented_data/0, + aggre_or_zip_init_acc/0, + logic_sum_metrics/0 ]). %% %% @private @@ -127,7 +131,7 @@ deregister_cleanup(_) -> ok. Callback :: prometheus_collector:collect_mf_callback(). %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), @@ -139,8 +143,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), - %% TODO: merge node name in json format + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), #{ emqx_authn => collect_json_data(?MG(authn, RawData)), emqx_authz => collect_json_data(?MG(authz, RawData)), @@ -159,37 +162,28 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_auth(Name, Metrics). -%% @private -fetch_metric_data_from_local_node() -> +%% behaviour +fetch_data_from_local_node() -> {node(self()), #{ authn => authn_data(), authz => authz_data() }}. -fetch_cluster_consistented_metric_data() -> +fetch_cluster_consistented_data() -> #{ authn_users_count => authn_users_count_data(), authz_rules_count => authz_rules_count_data(), banned_count => banned_count_data() }. -%% raw data for different format modes -raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> - AggregatedNodesMetrics = aggre_cluster(all_nodes_metrics()), - maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> - %% then fold from all nodes - AllNodesMetrics = with_node_name_label(all_nodes_metrics()), - maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__NODE) -> - {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), - maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). +aggre_or_zip_init_acc() -> + #{ + authn => maps:from_keys(authn_metric_names(), []), + authz => maps:from_keys(authz_metric_names(), []) + }. -all_nodes_metrics() -> - Nodes = mria:running_nodes(), - _ResL = emqx_prometheus_proto_v2:raw_prom_data( - Nodes, ?MODULE, fetch_metric_data_from_local_node, [] - ). +logic_sum_metrics() -> + ?LOGICAL_SUM_METRIC_NAMES. %%-------------------------------------------------------------------- %% Collector @@ -286,7 +280,7 @@ lookup_authn_metrics_local(Id) -> case emqx_authn_api:lookup_from_local_node(?GLOBAL, Id) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authn_status => emqx_prometheus_utils:status_to_number(Status), + emqx_authn_status => emqx_prometheus_cluster:status_to_number(Status), emqx_authn_nomatch => ?MG0(nomatch, Counters), emqx_authn_total => ?MG0(total, Counters), emqx_authn_success => ?MG0(success, Counters), @@ -297,7 +291,7 @@ lookup_authn_metrics_local(Id) -> end. authn_metric_names() -> - emqx_prometheus_utils:metric_names(?AUTHNS_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?AUTHNS_WITH_TYPE). %%==================== %% Authn users count @@ -364,7 +358,7 @@ lookup_authz_metrics_local(Type) -> case emqx_authz_api_sources:lookup_from_local_node(Type) of {ok, {_Node, Status, #{counters := Counters}, _ResourceMetrics}} -> #{ - emqx_authz_status => emqx_prometheus_utils:status_to_number(Status), + emqx_authz_status => emqx_prometheus_cluster:status_to_number(Status), emqx_authz_nomatch => ?MG0(nomatch, Counters), emqx_authz_total => ?MG0(total, Counters), emqx_authz_success => ?MG0(success, Counters), @@ -375,7 +369,7 @@ lookup_authz_metrics_local(Type) -> end. authz_metric_names() -> - emqx_prometheus_utils:metric_names(?AUTHZS_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?AUTHZS_WITH_TYPE). %%==================== %% Authz rules count @@ -418,7 +412,7 @@ banned_count_data() -> %% merge / zip formatting funcs for type `application/json` collect_json_data(Data) -> - emqx_prometheus_utils:collect_json_data(Data, fun zip_json_auth_metrics/3). + emqx_prometheus_cluster:collect_json_data(Data, fun zip_json_auth_metrics/3). collect_banned_data() -> #{emqx_banned_count => banned_count_data()}. @@ -440,7 +434,7 @@ zip_json_auth_metrics(Key, Points, [] = _AccIn) -> Points ); zip_json_auth_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + ThisKeyResult = lists:foldl(emqx_prometheus_cluster:point_to_map_fun(Key), [], Points), lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). users_or_rule_count(#{id := Id}) -> @@ -462,14 +456,6 @@ users_or_rule_count(#{type := Type}) -> users_or_rule_count(_) -> #{}. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% merge / zip formatting funcs for type `text/plain` -aggre_cluster(ResL) -> - emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). - -with_node_name_label(ResL) -> - emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Helper funcs @@ -488,12 +474,6 @@ mnesia_size(Tab) -> mnesia:table_info(Tab, size). do_metric(emqx_authn_enable, #{enable := B}, _) -> - emqx_prometheus_utils:boolean_to_number(B); + emqx_prometheus_cluster:boolean_to_number(B); do_metric(K, _, Metrics) -> ?MG0(K, Metrics). - -aggre_or_zip_init_acc() -> - #{ - authn => maps:from_keys(authn_metric_names(), []), - authz => maps:from_keys(authz_metric_names(), []) - }. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_utils.erl b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl similarity index 76% rename from apps/emqx_prometheus/src/emqx_prometheus_utils.erl rename to apps/emqx_prometheus/src/emqx_prometheus_cluster.erl index fadfb5c47..e48df0f8b 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_utils.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl @@ -13,9 +13,13 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %%-------------------------------------------------------------------- --module(emqx_prometheus_utils). +-module(emqx_prometheus_cluster). + +-include("emqx_prometheus.hrl"). -export([ + raw_data/2, + collect_json_data/2, aggre_cluster/3, @@ -28,9 +32,34 @@ metric_names/1 ]). +-callback fetch_cluster_consistented_data() -> map(). + +-callback fetch_data_from_local_node() -> {node(), map()}. + +-callback aggre_or_zip_init_acc() -> map(). + -define(MG(K, MAP), maps:get(K, MAP)). -define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). +raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> + AllNodesMetrics = aggre_cluster(Module), + Cluster = Module:fetch_cluster_consistented_data(), + maps:merge(AllNodesMetrics, Cluster); +raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> + AllNodesMetrics = with_node_name_label(Module), + Cluster = Module:fetch_cluster_consistented_data(), + maps:merge(AllNodesMetrics, Cluster); +raw_data(Module, ?PROM_DATA_MODE__NODE) -> + {_Node, LocalNodeMetrics} = Module:fetch_data_from_local_node(), + Cluster = Module:fetch_cluster_consistented_data(), + maps:merge(LocalNodeMetrics, Cluster). + +metrics_data_from_all_nodes(Module) -> + Nodes = mria:running_nodes(), + _ResL = emqx_prometheus_proto_v2:raw_prom_data( + Nodes, Module, fetch_data_from_local_node, [] + ). + collect_json_data(Data, Func) when is_function(Func, 3) -> maps:fold( fun(K, V, Acc) -> @@ -42,6 +71,17 @@ collect_json_data(Data, Func) when is_function(Func, 3) -> collect_json_data(_, _) -> error(badarg). +aggre_cluster(Module) -> + do_aggre_cluster( + Module:logic_sum_metrics(), + metrics_data_from_all_nodes(Module), + Module:aggre_or_zip_init_acc() + ). + +with_node_name_label(Module) -> + ResL = metrics_data_from_all_nodes(Module), + do_with_node_name_label(ResL, Module:aggre_or_zip_init_acc()). + aggre_cluster(LogicSumKs, ResL, Init) -> do_aggre_cluster(LogicSumKs, ResL, Init). @@ -58,7 +98,6 @@ do_aggre_cluster(LogicSumKs, [{ok, {_NodeName, NodeMetric}} | Rest], AccIn) -> AccIn, NodeMetric ) - %% merge_node_and_acc() ); do_aggre_cluster(LogicSumKs, [{_, _} | Rest], AccIn) -> do_aggre_cluster(LogicSumKs, Rest, AccIn). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index 729e1f640..bfd011eaa 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -29,8 +29,12 @@ ]). %% for bpapi +-behaviour(emqx_prometheus_cluster). -export([ - fetch_metric_data_from_local_node/0 + fetch_data_from_local_node/0, + fetch_cluster_consistented_data/0, + aggre_or_zip_init_acc/0, + logic_sum_metrics/0 ]). -export([add_collect_family/4]). @@ -120,6 +124,37 @@ emqx_connector_status ]). +%%-------------------------------------------------------------------- +%% Callback for emqx_prometheus_cluster +%%-------------------------------------------------------------------- + +fetch_data_from_local_node() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + {node(self()), #{ + rule_specific_data => rule_specific_data(Rules), + action_specific_data => action_specific_data(Bridges), + connector_specific_data => connector_specific_data(Bridges) + }}. + +fetch_cluster_consistented_data() -> + Rules = emqx_rule_engine:get_rules(), + Bridges = emqx_bridge:list(), + (maybe_collect_schema_registry())#{ + rules_data => rules_data(Rules), + connectors_data => connectors_data(Bridges) + }. + +aggre_or_zip_init_acc() -> + #{ + rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), + action_specific_data => maps:from_keys(action_specific_metric_names(), []), + connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) + }. + +logic_sum_metrics() -> + ?LOGICAL_SUM_METRIC_NAMES. + %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -132,7 +167,7 @@ deregister_cleanup(_) -> ok. _Registry :: prometheus_registry:registry(), Callback :: prometheus_collector:collect_mf_callback(). collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), %% Data Integration Overview ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), @@ -157,7 +192,7 @@ collect_mf(_, _) -> %% @private collect(<<"json">>) -> - RawData = raw_data(?GET_PROM_DATA_MODE()), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), #{ @@ -183,24 +218,6 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). -%% @private -fetch_metric_data_from_local_node() -> - Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), - {node(self()), #{ - rule_specific_data => rule_specific_data(Rules), - action_specific_data => action_specific_data(Bridges), - connector_specific_data => connector_specific_data(Bridges) - }}. - -fetch_cluster_consistented_metric_data() -> - Rules = emqx_rule_engine:get_rules(), - Bridges = emqx_bridge:list(), - (maybe_collect_schema_registry())#{ - rules_data => rules_data(Rules), - connectors_data => connectors_data(Bridges) - }. - -if(?EMQX_RELEASE_EDITION == ee). maybe_collect_family_schema_registry(Callback) -> ok = add_collect_family(Callback, ?SCHEMA_REGISTRY_WITH_TYPE, schema_registry_data()), @@ -216,24 +233,6 @@ maybe_collect_schema_registry() -> #{}. -endif. -%% raw data for different format modes -raw_data(?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> - AggregatedNodesMetrics = aggre_cluster(metrics_data_from_all_nodes()), - maps:merge(AggregatedNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED) -> - %% then fold from all nodes - AllNodesMetrics = with_node_name_label(metrics_data_from_all_nodes()), - maps:merge(AllNodesMetrics, fetch_cluster_consistented_metric_data()); -raw_data(?PROM_DATA_MODE__NODE) -> - {_Node, LocalNodeMetrics} = fetch_metric_data_from_local_node(), - maps:merge(LocalNodeMetrics, fetch_cluster_consistented_metric_data()). - -metrics_data_from_all_nodes() -> - Nodes = mria:running_nodes(), - _ResL = emqx_prometheus_proto_v2:raw_prom_data( - Nodes, ?MODULE, fetch_metric_data_from_local_node, [] - ). - %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -398,7 +397,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> case emqx_metrics_worker:get_metrics(rule_metrics, Id) of #{counters := Counters} -> #{ - emqx_rule_enable => emqx_prometheus_utils:boolean_to_number(Bool), + emqx_rule_enable => emqx_prometheus_cluster:boolean_to_number(Bool), emqx_rule_matched => ?MG(matched, Counters), emqx_rule_failed => ?MG(failed, Counters), emqx_rule_passed => ?MG(passed, Counters), @@ -415,7 +414,7 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> end. rule_specific_metric_names() -> - emqx_prometheus_utils:metric_names(?RULES_SPECIFIC_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?RULES_SPECIFIC_WITH_TYPE). %%==================== %% Specific Action @@ -469,7 +468,7 @@ get_bridge_metric(Type, Name) -> end. action_specific_metric_names() -> - emqx_prometheus_utils:metric_names(?ACTION_SPECIFIC_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?ACTION_SPECIFIC_WITH_TYPE). %%==================== %% Specific Connector @@ -501,12 +500,12 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> Enabled = emqx_utils_maps:deep_get([config, enable], ResourceData), Status = ?MG(status, ResourceData), #{ - emqx_connector_enable => emqx_prometheus_utils:boolean_to_number(Enabled), - emqx_connector_status => emqx_prometheus_utils:status_to_number(Status) + emqx_connector_enable => emqx_prometheus_cluster:boolean_to_number(Enabled), + emqx_connector_status => emqx_prometheus_cluster:status_to_number(Status) }. connectr_specific_metric_names() -> - emqx_prometheus_utils:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). + emqx_prometheus_cluster:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). %%-------------------------------------------------------------------- %% Collect functions @@ -521,19 +520,19 @@ collect_data_integration_overview(Rules, Bridges) -> M1 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, - emqx_prometheus_utils:metric_names(?RULES_WITH_TYPE) + emqx_prometheus_cluster:metric_names(?RULES_WITH_TYPE) ), M2 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, - emqx_prometheus_utils:metric_names(?CONNECTORS_WITH_TYPE) + emqx_prometheus_cluster:metric_names(?CONNECTORS_WITH_TYPE) ), M3 = maybe_collect_schema_registry(), lists:foldl(fun(M, AccIn) -> maps:merge(M, AccIn) end, #{}, [M1, M2, M3]). collect_json_data(Data) -> - emqx_prometheus_utils:collect_json_data(Data, fun zip_json_data_integration_metrics/3). + emqx_prometheus_cluster:collect_json_data(Data, fun zip_json_data_integration_metrics/3). %% for initialized empty AccIn %% The following fields will be put into Result @@ -555,23 +554,5 @@ zip_json_data_integration_metrics(Key, Points, [] = _AccIn) -> Points ); zip_json_data_integration_metrics(Key, Points, AllResultedAcc) -> - ThisKeyResult = lists:foldl(emqx_prometheus_utils:point_to_map_fun(Key), [], Points), + ThisKeyResult = lists:foldl(emqx_prometheus_cluster:point_to_map_fun(Key), [], Points), lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% merge / zip formatting funcs for type `text/plain` -aggre_cluster(ResL) -> - emqx_prometheus_utils:aggre_cluster(?LOGICAL_SUM_METRIC_NAMES, ResL, aggre_or_zip_init_acc()). - -with_node_name_label(ResL) -> - emqx_prometheus_utils:with_node_name_label(ResL, aggre_or_zip_init_acc()). - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% Helper funcs - -aggre_or_zip_init_acc() -> - #{ - rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), - action_specific_data => maps:from_keys(action_specific_metric_names(), []), - connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) - }. From 6b064dd8eb76a79b293767bbe5715408b12e51c2 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 17:14:41 +0800 Subject: [PATCH 34/38] fix(prom_stats): missing metric key - follow https://github.com/emqx/emqx/pull/11497 --- apps/emqx/src/emqx_stats.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/emqx/src/emqx_stats.erl b/apps/emqx/src/emqx_stats.erl index dfd3115f0..9685823ff 100644 --- a/apps/emqx/src/emqx_stats.erl +++ b/apps/emqx/src/emqx_stats.erl @@ -166,6 +166,8 @@ names() -> emqx_live_connections_max, emqx_sessions_count, emqx_sessions_max, + emqx_channels_count, + emqx_channels_max, emqx_topics_count, emqx_topics_max, emqx_suboptions_count, From b424f8ac1222cf6fe7263bdd1011052021d02001 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Fri, 19 Jan 2024 17:35:12 +0800 Subject: [PATCH 35/38] feat(prom_stats): aggregated/unaggregated prometheus data --- apps/emqx_prometheus/src/emqx_prometheus.erl | 1135 +++++++++-------- .../src/emqx_prometheus_auth.erl | 130 +- .../src/emqx_prometheus_data_integration.erl | 364 +++--- 3 files changed, 838 insertions(+), 791 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 264d818c9..af35acc36 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -22,6 +22,16 @@ %% be used by the prometheus application -behaviour(prometheus_collector). +-behaviour(emqx_prometheus_cluster). +-export([ + fetch_data_from_local_node/0, + fetch_cluster_consistented_data/0, + aggre_or_zip_init_acc/0, + logic_sum_metrics/0 +]). + +-export([zip_json_prom_stats_metrics/3]). + -include("emqx_prometheus.hrl"). -include_lib("public_key/include/public_key.hrl"). @@ -34,7 +44,7 @@ create_mf/5, gauge_metric/1, gauge_metrics/1, - counter_metric/1 + counter_metrics/1 ] ). @@ -67,12 +77,21 @@ do_stop/0 ]). +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +-define(MG(K, MAP), maps:get(K, MAP)). +-define(MG0(K, MAP), maps:get(K, MAP, 0)). + -define(C(K, L), proplists:get_value(K, L, 0)). -define(TIMER_MSG, '#interval'). -define(HTTP_OPTIONS, [{autoredirect, true}, {timeout, 60000}]). +-define(LOGICAL_SUM_METRIC_NAMES, []). + %%-------------------------------------------------------------------- %% APIs %%-------------------------------------------------------------------- @@ -172,85 +191,96 @@ deregister_cleanup(?PROMETHEUS_DEFAULT_REGISTRY) -> ok. collect_mf(?PROMETHEUS_DEFAULT_REGISTRY, Callback) -> - Metrics = emqx_metrics:all(), - Stats = emqx_stats:getstats(), - VMData = emqx_vm_data(), - ClusterData = emqx_cluster_data(), - CertsData = emqx_certs_data(), + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), %% TODO: license expiry epoch and cert expiry epoch should be cached - _ = [add_collect_family(Name, CertsData, Callback, gauge) || Name <- emqx_certs()], - _ = [add_collect_family(Name, Stats, Callback, gauge) || Name <- emqx_stats:names()], - _ = [add_collect_family(Name, VMData, Callback, gauge) || Name <- emqx_vm()], - _ = [add_collect_family(Name, ClusterData, Callback, gauge) || Name <- emqx_cluster()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_packets()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_messages()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_delivery()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_client()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_session()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_olp()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_acl()], - _ = [add_collect_family(Name, Metrics, Callback, counter) || Name <- emqx_metrics_authn()], - ok = maybe_collect_family_license(Callback), + ok = add_collect_family(Callback, stats_metric_meta(), ?MG(stats_data, RawData)), + ok = add_collect_family(Callback, vm_metric_meta(), ?MG(vm_data, RawData)), + ok = add_collect_family(Callback, cluster_metric_meta(), ?MG(cluster_data, RawData)), + + ok = add_collect_family(Callback, emqx_packet_metric_meta(), ?MG(emqx_packet_data, RawData)), + ok = add_collect_family(Callback, message_metric_meta(), ?MG(emqx_message_data, RawData)), + ok = add_collect_family(Callback, delivery_metric_meta(), ?MG(emqx_delivery_data, RawData)), + ok = add_collect_family(Callback, client_metric_meta(), ?MG(emqx_client_data, RawData)), + ok = add_collect_family(Callback, session_metric_meta(), ?MG(emqx_session_data, RawData)), + ok = add_collect_family(Callback, olp_metric_meta(), ?MG(emqx_olp_data, RawData)), + ok = add_collect_family(Callback, acl_metric_meta(), ?MG(emqx_acl_data, RawData)), + ok = add_collect_family(Callback, authn_metric_meta(), ?MG(emqx_authn_data, RawData)), + + ok = add_collect_family(Callback, cert_metric_meta(), ?MG(cert_data, RawData)), + ok = maybe_license_add_collect_family(Callback, RawData), ok; collect_mf(_Registry, _Callback) -> ok. %% @private collect(<<"json">>) -> - Metrics = emqx_metrics:all(), - Stats = emqx_stats:getstats(), - VMData = emqx_vm_data(), - %% TODO: FIXME! - %% emqx_metrics_olp()), - %% emqx_metrics_acl()), - %% emqx_metrics_authn()), - (maybe_collect_license())#{ - certs => collect_certs_json(emqx_certs_data()), - stats => maps:from_list([collect_stats(Name, Stats) || Name <- emqx_stats:names()]), - metrics => maps:from_list([collect_stats(Name, VMData) || Name <- emqx_vm()]), - packets => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_packets()]), - messages => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_messages()]), - delivery => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_delivery()]), - client => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_client()]), - session => maps:from_list([collect_stats(Name, Metrics) || Name <- emqx_metrics_session()]) + RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), + (maybe_license_collect_json_data(RawData))#{ + stats => collect_json_data(?MG(stats_data, RawData)), + metrics => collect_json_data(?MG(vm_data, RawData)), + packets => collect_json_data(?MG(emqx_packet_data, RawData)), + messages => collect_json_data(?MG(emqx_message_data, RawData)), + delivery => collect_json_data(?MG(emqx_delivery_data, RawData)), + client => collect_json_data(?MG(emqx_client_data, RawData)), + session => collect_json_data(?MG(emqx_session_data, RawData)), + cluster => collect_json_data(?MG(cluster_data, RawData)), + olp => collect_json_data(?MG(emqx_olp_data, RawData)), + acl => collect_json_data(?MG(emqx_acl_data, RawData)), + authn => collect_json_data(?MG(emqx_authn_data, RawData)), + certs => collect_cert_json_data(?MG(cert_data, RawData)) }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DEFAULT_REGISTRY). -%% @private -collect_stats(Name, Stats) -> - R = collect_metrics(Name, Stats), - case R#'Metric'.gauge of - undefined -> - {_, Val} = R#'Metric'.counter, - {Name, Val}; - {_, Val} -> - {Name, Val} - end. - collect_metrics(Name, Metrics) -> emqx_collect(Name, Metrics). +add_collect_family(Callback, MetricWithType, Data) -> + _ = [add_collect_family(Name, Data, Callback, Type) || {Name, Type, _} <- MetricWithType], + ok. + add_collect_family(Name, Data, Callback, Type) -> Callback(create_mf(Name, _Help = <<"">>, Type, ?MODULE, Data)). --if(?EMQX_RELEASE_EDITION == ee). -maybe_collect_family_license(Callback) -> - LicenseData = emqx_license_data(), - _ = [add_collect_family(Name, LicenseData, Callback, gauge) || Name <- emqx_license()], - ok. +%% behaviour +fetch_data_from_local_node() -> + {node(self()), #{ + stats_data => stats_data(), + vm_data => vm_data(), + cluster_data => cluster_data(), + %% Metrics + emqx_packet_data => emqx_metric_data(emqx_packet_metric_meta()), + emqx_message_data => emqx_metric_data(message_metric_meta()), + emqx_delivery_data => emqx_metric_data(delivery_metric_meta()), + emqx_client_data => emqx_metric_data(client_metric_meta()), + emqx_session_data => emqx_metric_data(session_metric_meta()), + emqx_olp_data => emqx_metric_data(olp_metric_meta()), + emqx_acl_data => emqx_metric_data(acl_metric_meta()), + emqx_authn_data => emqx_metric_data(authn_metric_meta()) + }}. -maybe_collect_license() -> - LicenseData = emqx_license_data(), - #{license => maps:from_list([collect_stats(Name, LicenseData) || Name <- emqx_license()])}. +fetch_cluster_consistented_data() -> + (maybe_license_fetch_data())#{ + cert_data => cert_data() + }. --else. -maybe_collect_family_license(_) -> - ok. +aggre_or_zip_init_acc() -> + #{ + stats_data => maps:from_keys(metrics_name(stats_metric_meta()), []), + vm_data => maps:from_keys(metrics_name(vm_metric_meta()), []), + cluster_data => maps:from_keys(metrics_name(cluster_metric_meta()), []), + emqx_packet_data => maps:from_keys(metrics_name(emqx_packet_metric_meta()), []), + emqx_message_data => maps:from_keys(metrics_name(message_metric_meta()), []), + emqx_delivery_data => maps:from_keys(metrics_name(delivery_metric_meta()), []), + emqx_client_data => maps:from_keys(metrics_name(client_metric_meta()), []), + emqx_session_data => maps:from_keys(metrics_name(session_metric_meta()), []), + emqx_olp_data => maps:from_keys(metrics_name(olp_metric_meta()), []), + emqx_acl_data => maps:from_keys(metrics_name(acl_metric_meta()), []), + emqx_authn_data => maps:from_keys(metrics_name(authn_metric_meta()), []) + }. -maybe_collect_license() -> - #{}. --endif. +logic_sum_metrics() -> + ?LOGICAL_SUM_METRIC_NAMES. %%-------------------------------------------------------------------- %% Collector @@ -258,512 +288,513 @@ maybe_collect_license() -> %%-------------------------------------------------------------------- %% Stats - %% connections -emqx_collect(emqx_connections_count, Stats) -> - gauge_metric(?C('connections.count', Stats)); -emqx_collect(emqx_connections_max, Stats) -> - gauge_metric(?C('connections.max', Stats)); -emqx_collect(emqx_live_connections_count, Stats) -> - gauge_metric(?C('live_connections.count', Stats)); -emqx_collect(emqx_live_connections_max, Stats) -> - gauge_metric(?C('live_connections.max', Stats)); +emqx_collect(K = emqx_connections_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_connections_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_live_connections_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_live_connections_max, D) -> gauge_metrics(?MG(K, D)); %% sessions -emqx_collect(emqx_sessions_count, Stats) -> - gauge_metric(?C('sessions.count', Stats)); -emqx_collect(emqx_sessions_max, Stats) -> - gauge_metric(?C('sessions.max', Stats)); -emqx_collect(emqx_channels_count, Stats) -> - gauge_metric(?C('channels.count', Stats)); -emqx_collect(emqx_channels_max, Stats) -> - gauge_metric(?C('channels.max', Stats)); +emqx_collect(K = emqx_sessions_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_sessions_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_channels_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_channels_max, D) -> gauge_metrics(?MG(K, D)); %% pub/sub stats -emqx_collect(emqx_topics_count, Stats) -> - gauge_metric(?C('topics.count', Stats)); -emqx_collect(emqx_topics_max, Stats) -> - gauge_metric(?C('topics.max', Stats)); -emqx_collect(emqx_suboptions_count, Stats) -> - gauge_metric(?C('suboptions.count', Stats)); -emqx_collect(emqx_suboptions_max, Stats) -> - gauge_metric(?C('suboptions.max', Stats)); -emqx_collect(emqx_subscribers_count, Stats) -> - gauge_metric(?C('subscribers.count', Stats)); -emqx_collect(emqx_subscribers_max, Stats) -> - gauge_metric(?C('subscribers.max', Stats)); -emqx_collect(emqx_subscriptions_count, Stats) -> - gauge_metric(?C('subscriptions.count', Stats)); -emqx_collect(emqx_subscriptions_max, Stats) -> - gauge_metric(?C('subscriptions.max', Stats)); -emqx_collect(emqx_subscriptions_shared_count, Stats) -> - gauge_metric(?C('subscriptions.shared.count', Stats)); -emqx_collect(emqx_subscriptions_shared_max, Stats) -> - gauge_metric(?C('subscriptions.shared.max', Stats)); +emqx_collect(K = emqx_topics_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_topics_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_suboptions_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_suboptions_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscribers_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscribers_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_max, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_shared_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_subscriptions_shared_max, D) -> gauge_metrics(?MG(K, D)); %% retained -emqx_collect(emqx_retained_count, Stats) -> - gauge_metric(?C('retained.count', Stats)); -emqx_collect(emqx_retained_max, Stats) -> - gauge_metric(?C('retained.max', Stats)); +emqx_collect(K = emqx_retained_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_retained_max, D) -> gauge_metrics(?MG(K, D)); %% delayed -emqx_collect(emqx_delayed_count, Stats) -> - gauge_metric(?C('delayed.count', Stats)); -emqx_collect(emqx_delayed_max, Stats) -> - gauge_metric(?C('delayed.max', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - packets & bytes - -%% bytes -emqx_collect(emqx_bytes_received, Metrics) -> - counter_metric(?C('bytes.received', Metrics)); -emqx_collect(emqx_bytes_sent, Metrics) -> - counter_metric(?C('bytes.sent', Metrics)); -%% received.sent -emqx_collect(emqx_packets_received, Metrics) -> - counter_metric(?C('packets.received', Metrics)); -emqx_collect(emqx_packets_sent, Metrics) -> - counter_metric(?C('packets.sent', Metrics)); -%% connect -emqx_collect(emqx_packets_connect, Metrics) -> - counter_metric(?C('packets.connect.received', Metrics)); -emqx_collect(emqx_packets_connack_sent, Metrics) -> - counter_metric(?C('packets.connack.sent', Metrics)); -emqx_collect(emqx_packets_connack_error, Metrics) -> - counter_metric(?C('packets.connack.error', Metrics)); -emqx_collect(emqx_packets_connack_auth_error, Metrics) -> - counter_metric(?C('packets.connack.auth_error', Metrics)); -%% sub.unsub -emqx_collect(emqx_packets_subscribe_received, Metrics) -> - counter_metric(?C('packets.subscribe.received', Metrics)); -emqx_collect(emqx_packets_subscribe_auth_error, Metrics) -> - counter_metric(?C('packets.subscribe.auth_error', Metrics)); -emqx_collect(emqx_packets_subscribe_error, Metrics) -> - counter_metric(?C('packets.subscribe.error', Metrics)); -emqx_collect(emqx_packets_suback_sent, Metrics) -> - counter_metric(?C('packets.suback.sent', Metrics)); -emqx_collect(emqx_packets_unsubscribe_received, Metrics) -> - counter_metric(?C('packets.unsubscribe.received', Metrics)); -emqx_collect(emqx_packets_unsubscribe_error, Metrics) -> - counter_metric(?C('packets.unsubscribe.error', Metrics)); -emqx_collect(emqx_packets_unsuback_sent, Metrics) -> - counter_metric(?C('packets.unsuback.sent', Metrics)); -%% publish.puback -emqx_collect(emqx_packets_publish_received, Metrics) -> - counter_metric(?C('packets.publish.received', Metrics)); -emqx_collect(emqx_packets_publish_sent, Metrics) -> - counter_metric(?C('packets.publish.sent', Metrics)); -emqx_collect(emqx_packets_publish_inuse, Metrics) -> - counter_metric(?C('packets.publish.inuse', Metrics)); -emqx_collect(emqx_packets_publish_error, Metrics) -> - counter_metric(?C('packets.publish.error', Metrics)); -emqx_collect(emqx_packets_publish_auth_error, Metrics) -> - counter_metric(?C('packets.publish.auth_error', Metrics)); -emqx_collect(emqx_packets_publish_dropped, Metrics) -> - counter_metric(?C('packets.publish.dropped', Metrics)); -%% puback -emqx_collect(emqx_packets_puback_received, Metrics) -> - counter_metric(?C('packets.puback.received', Metrics)); -emqx_collect(emqx_packets_puback_sent, Metrics) -> - counter_metric(?C('packets.puback.sent', Metrics)); -emqx_collect(emqx_packets_puback_inuse, Metrics) -> - counter_metric(?C('packets.puback.inuse', Metrics)); -emqx_collect(emqx_packets_puback_missed, Metrics) -> - counter_metric(?C('packets.puback.missed', Metrics)); -%% pubrec -emqx_collect(emqx_packets_pubrec_received, Metrics) -> - counter_metric(?C('packets.pubrec.received', Metrics)); -emqx_collect(emqx_packets_pubrec_sent, Metrics) -> - counter_metric(?C('packets.pubrec.sent', Metrics)); -emqx_collect(emqx_packets_pubrec_inuse, Metrics) -> - counter_metric(?C('packets.pubrec.inuse', Metrics)); -emqx_collect(emqx_packets_pubrec_missed, Metrics) -> - counter_metric(?C('packets.pubrec.missed', Metrics)); -%% pubrel -emqx_collect(emqx_packets_pubrel_received, Metrics) -> - counter_metric(?C('packets.pubrel.received', Metrics)); -emqx_collect(emqx_packets_pubrel_sent, Metrics) -> - counter_metric(?C('packets.pubrel.sent', Metrics)); -emqx_collect(emqx_packets_pubrel_missed, Metrics) -> - counter_metric(?C('packets.pubrel.missed', Metrics)); -%% pubcomp -emqx_collect(emqx_packets_pubcomp_received, Metrics) -> - counter_metric(?C('packets.pubcomp.received', Metrics)); -emqx_collect(emqx_packets_pubcomp_sent, Metrics) -> - counter_metric(?C('packets.pubcomp.sent', Metrics)); -emqx_collect(emqx_packets_pubcomp_inuse, Metrics) -> - counter_metric(?C('packets.pubcomp.inuse', Metrics)); -emqx_collect(emqx_packets_pubcomp_missed, Metrics) -> - counter_metric(?C('packets.pubcomp.missed', Metrics)); -%% pingreq -emqx_collect(emqx_packets_pingreq_received, Metrics) -> - counter_metric(?C('packets.pingreq.received', Metrics)); -emqx_collect(emqx_packets_pingresp_sent, Metrics) -> - counter_metric(?C('packets.pingresp.sent', Metrics)); -%% disconnect -emqx_collect(emqx_packets_disconnect_received, Metrics) -> - counter_metric(?C('packets.disconnect.received', Metrics)); -emqx_collect(emqx_packets_disconnect_sent, Metrics) -> - counter_metric(?C('packets.disconnect.sent', Metrics)); -%% auth -emqx_collect(emqx_packets_auth_received, Metrics) -> - counter_metric(?C('packets.auth.received', Metrics)); -emqx_collect(emqx_packets_auth_sent, Metrics) -> - counter_metric(?C('packets.auth.sent', Metrics)); -%%-------------------------------------------------------------------- -%% Metrics - messages - -%% messages -emqx_collect(emqx_messages_received, Metrics) -> - counter_metric(?C('messages.received', Metrics)); -emqx_collect(emqx_messages_sent, Metrics) -> - counter_metric(?C('messages.sent', Metrics)); -emqx_collect(emqx_messages_qos0_received, Metrics) -> - counter_metric(?C('messages.qos0.received', Metrics)); -emqx_collect(emqx_messages_qos0_sent, Metrics) -> - counter_metric(?C('messages.qos0.sent', Metrics)); -emqx_collect(emqx_messages_qos1_received, Metrics) -> - counter_metric(?C('messages.qos1.received', Metrics)); -emqx_collect(emqx_messages_qos1_sent, Metrics) -> - counter_metric(?C('messages.qos1.sent', Metrics)); -emqx_collect(emqx_messages_qos2_received, Metrics) -> - counter_metric(?C('messages.qos2.received', Metrics)); -emqx_collect(emqx_messages_qos2_sent, Metrics) -> - counter_metric(?C('messages.qos2.sent', Metrics)); -emqx_collect(emqx_messages_publish, Metrics) -> - counter_metric(?C('messages.publish', Metrics)); -emqx_collect(emqx_messages_dropped, Metrics) -> - counter_metric(?C('messages.dropped', Metrics)); -emqx_collect(emqx_messages_dropped_expired, Metrics) -> - counter_metric(?C('messages.dropped.await_pubrel_timeout', Metrics)); -emqx_collect(emqx_messages_dropped_no_subscribers, Metrics) -> - counter_metric(?C('messages.dropped.no_subscribers', Metrics)); -emqx_collect(emqx_messages_forward, Metrics) -> - counter_metric(?C('messages.forward', Metrics)); -emqx_collect(emqx_messages_retained, Metrics) -> - counter_metric(?C('messages.retained', Metrics)); -emqx_collect(emqx_messages_delayed, Stats) -> - counter_metric(?C('messages.delayed', Stats)); -emqx_collect(emqx_messages_delivered, Stats) -> - counter_metric(?C('messages.delivered', Stats)); -emqx_collect(emqx_messages_acked, Stats) -> - counter_metric(?C('messages.acked', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - delivery - -emqx_collect(emqx_delivery_dropped, Stats) -> - counter_metric(?C('delivery.dropped', Stats)); -emqx_collect(emqx_delivery_dropped_no_local, Stats) -> - counter_metric(?C('delivery.dropped.no_local', Stats)); -emqx_collect(emqx_delivery_dropped_too_large, Stats) -> - counter_metric(?C('delivery.dropped.too_large', Stats)); -emqx_collect(emqx_delivery_dropped_qos0_msg, Stats) -> - counter_metric(?C('delivery.dropped.qos0_msg', Stats)); -emqx_collect(emqx_delivery_dropped_queue_full, Stats) -> - counter_metric(?C('delivery.dropped.queue_full', Stats)); -emqx_collect(emqx_delivery_dropped_expired, Stats) -> - counter_metric(?C('delivery.dropped.expired', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - client -emqx_collect(emqx_client_connect, Stats) -> - counter_metric(?C('client.connect', Stats)); -emqx_collect(emqx_client_connack, Stats) -> - counter_metric(?C('client.connack', Stats)); -emqx_collect(emqx_client_connected, Stats) -> - counter_metric(?C('client.connected', Stats)); -emqx_collect(emqx_client_authenticate, Stats) -> - counter_metric(?C('client.authenticate', Stats)); -emqx_collect(emqx_client_auth_anonymous, Stats) -> - counter_metric(?C('client.auth.anonymous', Stats)); -emqx_collect(emqx_client_authorize, Stats) -> - counter_metric(?C('client.authorize', Stats)); -emqx_collect(emqx_client_subscribe, Stats) -> - counter_metric(?C('client.subscribe', Stats)); -emqx_collect(emqx_client_unsubscribe, Stats) -> - counter_metric(?C('client.unsubscribe', Stats)); -emqx_collect(emqx_client_disconnected, Stats) -> - counter_metric(?C('client.disconnected', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - session - -emqx_collect(emqx_session_created, Stats) -> - counter_metric(?C('session.created', Stats)); -emqx_collect(emqx_session_resumed, Stats) -> - counter_metric(?C('session.resumed', Stats)); -emqx_collect(emqx_session_takenover, Stats) -> - counter_metric(?C('session.takenover', Stats)); -emqx_collect(emqx_session_discarded, Stats) -> - counter_metric(?C('session.discarded', Stats)); -emqx_collect(emqx_session_terminated, Stats) -> - counter_metric(?C('session.terminated', Stats)); -%%-------------------------------------------------------------------- - -%% Metrics - overload protection -emqx_collect(emqx_overload_protection_delay_ok, Stats) -> - counter_metric(?C('overload_protection.delay.ok', Stats)); -emqx_collect(emqx_overload_protection_delay_timeout, Stats) -> - counter_metric(?C('overload_protection.delay.timeout', Stats)); -emqx_collect(emqx_overload_protection_hibernation, Stats) -> - counter_metric(?C('overload_protection.hibernation', Stats)); -emqx_collect(emqx_overload_protection_gc, Stats) -> - counter_metric(?C('overload_protection.gc', Stats)); -emqx_collect(emqx_overload_protection_new_conn, Stats) -> - counter_metric(?C('overload_protection.new_conn', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - acl -emqx_collect(emqx_authorization_allow, Stats) -> - counter_metric(?C('authorization.allow', Stats)); -emqx_collect(emqx_authorization_deny, Stats) -> - counter_metric(?C('authorization.deny', Stats)); -emqx_collect(emqx_authorization_cache_hit, Stats) -> - counter_metric(?C('authorization.cache_hit', Stats)); -emqx_collect(emqx_authorization_cache_miss, Stats) -> - counter_metric(?C('authorization.cache_miss', Stats)); -emqx_collect(emqx_authorization_superuser, Stats) -> - counter_metric(?C('authorization.superuser', Stats)); -emqx_collect(emqx_authorization_nomatch, Stats) -> - counter_metric(?C('authorization.nomatch', Stats)); -emqx_collect(emqx_authorization_matched_allow, Stats) -> - counter_metric(?C('authorization.matched_allow', Stats)); -emqx_collect(emqx_authorization_matched_deny, Stats) -> - counter_metric(?C('authorization.matched_deny', Stats)); -%%-------------------------------------------------------------------- -%% Metrics - authn -emqx_collect(emqx_authentication_success, Stats) -> - counter_metric(?C('authentication.success', Stats)); -emqx_collect(emqx_authentication_success_anonymous, Stats) -> - counter_metric(?C('authentication.success.anonymous', Stats)); -emqx_collect(emqx_authentication_failure, Stats) -> - counter_metric(?C('authentication.failure', Stats)); +emqx_collect(K = emqx_delayed_count, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_delayed_max, D) -> gauge_metrics(?MG(K, D)); %%-------------------------------------------------------------------- %% VM -emqx_collect(emqx_vm_cpu_use, VMData) -> - gauge_metric(?C(cpu_use, VMData)); -emqx_collect(emqx_vm_cpu_idle, VMData) -> - gauge_metric(?C(cpu_idle, VMData)); -emqx_collect(emqx_vm_run_queue, VMData) -> - gauge_metric(?C(run_queue, VMData)); -emqx_collect(emqx_vm_process_messages_in_queues, VMData) -> - gauge_metric(?C(process_total_messages, VMData)); -emqx_collect(emqx_vm_total_memory, VMData) -> - gauge_metric(?C(total_memory, VMData)); -emqx_collect(emqx_vm_used_memory, VMData) -> - gauge_metric(?C(used_memory, VMData)); -emqx_collect(emqx_cluster_nodes_running, ClusterData) -> - gauge_metric(?C(nodes_running, ClusterData)); -emqx_collect(emqx_cluster_nodes_stopped, ClusterData) -> - gauge_metric(?C(nodes_stopped, ClusterData)); +emqx_collect(K = emqx_vm_cpu_use, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_cpu_idle, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_run_queue, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_process_messages_in_queues, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_total_memory, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_vm_used_memory, D) -> gauge_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Cluster Info +emqx_collect(K = emqx_cluster_nodes_running, D) -> gauge_metrics(?MG(K, D)); +emqx_collect(K = emqx_cluster_nodes_stopped, D) -> gauge_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - packets & bytes +%% bytes +emqx_collect(K = emqx_bytes_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_bytes_sent, D) -> counter_metrics(?MG(K, D)); +%% received.sent +emqx_collect(K = emqx_packets_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_sent, D) -> counter_metrics(?MG(K, D)); +%% connect +emqx_collect(K = emqx_packets_connect, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_connack_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_connack_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_connack_auth_error, D) -> counter_metrics(?MG(K, D)); +%% sub.unsub +emqx_collect(K = emqx_packets_subscribe_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_subscribe_auth_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_subscribe_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_suback_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_unsubscribe_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_unsubscribe_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_unsuback_sent, D) -> counter_metrics(?MG(K, D)); +%% publish.puback +emqx_collect(K = emqx_packets_publish_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_auth_error, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_publish_dropped, D) -> counter_metrics(?MG(K, D)); +%% puback +emqx_collect(K = emqx_packets_puback_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_puback_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_puback_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_puback_missed, D) -> counter_metrics(?MG(K, D)); +%% pubrec +emqx_collect(K = emqx_packets_pubrec_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrec_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrec_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrec_missed, D) -> counter_metrics(?MG(K, D)); +%% pubrel +emqx_collect(K = emqx_packets_pubrel_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrel_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubrel_missed, D) -> counter_metrics(?MG(K, D)); +%% pubcomp +emqx_collect(K = emqx_packets_pubcomp_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubcomp_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubcomp_inuse, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pubcomp_missed, D) -> counter_metrics(?MG(K, D)); +%% pingreq +emqx_collect(K = emqx_packets_pingreq_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_pingresp_sent, D) -> counter_metrics(?MG(K, D)); +%% disconnect +emqx_collect(K = emqx_packets_disconnect_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_disconnect_sent, D) -> counter_metrics(?MG(K, D)); +%% auth +emqx_collect(K = emqx_packets_auth_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_packets_auth_sent, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - messages +%% messages +emqx_collect(K = emqx_messages_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos0_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos0_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos1_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos1_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos2_received, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_qos2_sent, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_publish, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_dropped, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_dropped_expired, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_dropped_no_subscribers, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_forward, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_retained, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_delayed, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_delivered, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_messages_acked, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - delivery +emqx_collect(K = emqx_delivery_dropped, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_no_local, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_too_large, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_qos0_msg, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_queue_full, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_delivery_dropped_expired, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - client +emqx_collect(K = emqx_client_connect, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_connack, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_connected, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_authenticate, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_auth_anonymous, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_authorize, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_subscribe, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_unsubscribe, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_client_disconnected, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - session +emqx_collect(K = emqx_session_created, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_resumed, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_takenover, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_discarded, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_session_terminated, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - overload protection +emqx_collect(K = emqx_overload_protection_delay_ok, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_delay_timeout, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_hibernation, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_gc, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_overload_protection_new_conn, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - acl +emqx_collect(K = emqx_authorization_allow, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_deny, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_cache_hit, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_cache_miss, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_superuser, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_nomatch, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_matched_allow, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authorization_matched_deny, D) -> counter_metrics(?MG(K, D)); +%%-------------------------------------------------------------------- +%% Metrics - authn +emqx_collect(K = emqx_authentication_success, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authentication_success_anonymous, D) -> counter_metrics(?MG(K, D)); +emqx_collect(K = emqx_authentication_failure, D) -> counter_metrics(?MG(K, D)); %%-------------------------------------------------------------------- %% License -emqx_collect(emqx_license_expiry_at, LicenseData) -> - gauge_metric(?C(expiry_at, LicenseData)); +emqx_collect(K = emqx_license_expiry_at, D) -> gauge_metric(?MG(K, D)); %%-------------------------------------------------------------------- %% Certs -emqx_collect(emqx_cert_expiry_at, CertsData) -> - gauge_metrics(CertsData). +emqx_collect(K = emqx_cert_expiry_at, D) -> gauge_metrics(?MG(K, D)). %%-------------------------------------------------------------------- %% Indicators %%-------------------------------------------------------------------- -emqx_metrics_packets() -> +%%======================================== +%% Stats +%%======================================== + +stats_metric_meta() -> [ - emqx_bytes_received, - emqx_bytes_sent, - emqx_packets_received, - emqx_packets_sent, - emqx_packets_connect, - emqx_packets_connack_sent, - emqx_packets_connack_error, - emqx_packets_connack_auth_error, - emqx_packets_publish_received, - emqx_packets_publish_sent, - emqx_packets_publish_inuse, - emqx_packets_publish_error, - emqx_packets_publish_auth_error, - emqx_packets_publish_dropped, - emqx_packets_puback_received, - emqx_packets_puback_sent, - emqx_packets_puback_inuse, - emqx_packets_puback_missed, - emqx_packets_pubrec_received, - emqx_packets_pubrec_sent, - emqx_packets_pubrec_inuse, - emqx_packets_pubrec_missed, - emqx_packets_pubrel_received, - emqx_packets_pubrel_sent, - emqx_packets_pubrel_missed, - emqx_packets_pubcomp_received, - emqx_packets_pubcomp_sent, - emqx_packets_pubcomp_inuse, - emqx_packets_pubcomp_missed, - emqx_packets_subscribe_received, - emqx_packets_subscribe_error, - emqx_packets_subscribe_auth_error, - emqx_packets_suback_sent, - emqx_packets_unsubscribe_received, - emqx_packets_unsubscribe_error, - emqx_packets_unsuback_sent, - emqx_packets_pingreq_received, - emqx_packets_pingresp_sent, - emqx_packets_disconnect_received, - emqx_packets_disconnect_sent, - emqx_packets_auth_received, - emqx_packets_auth_sent + %% connections + {emqx_connections_count, counter, 'connections.count'}, + {emqx_connections_max, counter, 'connections.max'}, + {emqx_live_connections_count, counter, 'live_connections.count'}, + {emqx_live_connections_max, counter, 'live_connections.max'}, + %% sessions + {emqx_sessions_count, counter, 'sessions.count'}, + {emqx_sessions_max, counter, 'sessions.max'}, + {emqx_channels_count, counter, 'channels.count'}, + {emqx_channels_max, counter, 'channels.max'}, + %% pub/sub stats + {emqx_topics_count, counter, 'topics.count'}, + {emqx_topics_max, counter, 'topics.max'}, + {emqx_suboptions_count, counter, 'suboptions.count'}, + {emqx_suboptions_max, counter, 'suboptions.max'}, + {emqx_subscribers_count, counter, 'subscribers.count'}, + {emqx_subscribers_max, counter, 'subscribers.max'}, + {emqx_subscriptions_count, counter, 'subscriptions.count'}, + {emqx_subscriptions_max, counter, 'subscriptions.max'}, + {emqx_subscriptions_shared_count, counter, 'subscriptions.shared.count'}, + {emqx_subscriptions_shared_max, counter, 'subscriptions.shared.max'}, + %% retained + {emqx_retained_count, counter, 'retained.count'}, + {emqx_retained_max, counter, 'retained.max'}, + %% delayed + {emqx_delayed_count, counter, 'delayed.count'}, + {emqx_delayed_max, counter, 'delayed.max'} ]. -emqx_metrics_olp() -> - case emqx_config_zones:is_olp_enabled() of - true -> - [ - emqx_overload_protection_delay_ok, - emqx_overload_protection_delay_timeout, - emqx_overload_protection_hibernation, - emqx_overload_protection_gc, - emqx_overload_protection_new_conn - ]; - false -> - [] - end. +stats_data() -> + Stats = emqx_stats:getstats(), + lists:foldl( + fun({Name, _Type, MetricKAtom}, AccIn) -> + AccIn#{Name => [{[], ?C(MetricKAtom, Stats)}]} + end, + #{}, + stats_metric_meta() + ). -emqx_metrics_acl() -> +%%======================================== +%% Erlang VM +%%======================================== + +vm_metric_meta() -> [ - emqx_authorization_allow, - emqx_authorization_deny, - emqx_authorization_cache_hit, - emqx_authorization_cache_miss, - emqx_authorization_superuser, - emqx_authorization_nomatch, - emqx_authorization_matched_allow, - emqx_authorization_matched_deny + {emqx_vm_cpu_use, gauge, 'cpu_use'}, + {emqx_vm_cpu_idle, gauge, 'cpu_idle'}, + {emqx_vm_run_queue, gauge, 'run_queue'}, + {emqx_vm_process_messages_in_queues, gauge, 'process_total_messages'}, + {emqx_vm_total_memory, gauge, 'total_memory'}, + {emqx_vm_used_memory, gauge, 'used_memory'} ]. -emqx_metrics_authn() -> +vm_data() -> + VmStats = emqx_mgmt:vm_stats(), + lists:foldl( + fun({Name, _Type, MetricKAtom}, AccIn) -> + AccIn#{Name => [{[], ?C(MetricKAtom, VmStats)}]} + end, + #{}, + vm_metric_meta() + ). + +%%======================================== +%% Cluster +%%======================================== + +cluster_metric_meta() -> [ - emqx_authentication_success, - emqx_authentication_success_anonymous, - emqx_authentication_failure + {emqx_cluster_nodes_running, gauge, undefined}, + {emqx_cluster_nodes_stopped, gauge, undefined} ]. -emqx_metrics_messages() -> - [ - emqx_messages_received, - emqx_messages_sent, - emqx_messages_qos0_received, - emqx_messages_qos0_sent, - emqx_messages_qos1_received, - emqx_messages_qos1_sent, - emqx_messages_qos2_received, - emqx_messages_qos2_sent, - emqx_messages_publish, - emqx_messages_dropped, - emqx_messages_dropped_expired, - emqx_messages_dropped_no_subscribers, - emqx_messages_forward, - emqx_messages_retained, - emqx_messages_delayed, - emqx_messages_delivered, - emqx_messages_acked - ]. - -emqx_metrics_delivery() -> - [ - emqx_delivery_dropped, - emqx_delivery_dropped_no_local, - emqx_delivery_dropped_too_large, - emqx_delivery_dropped_qos0_msg, - emqx_delivery_dropped_queue_full, - emqx_delivery_dropped_expired - ]. - -emqx_metrics_client() -> - [ - emqx_client_connect, - emqx_client_connack, - emqx_client_connected, - emqx_client_authenticate, - emqx_client_auth_anonymous, - emqx_client_authorize, - emqx_client_subscribe, - emqx_client_unsubscribe, - emqx_client_disconnected - ]. - -emqx_metrics_session() -> - [ - emqx_session_created, - emqx_session_resumed, - emqx_session_takenover, - emqx_session_discarded, - emqx_session_terminated - ]. - -emqx_vm() -> - [ - emqx_vm_cpu_use, - emqx_vm_cpu_idle, - emqx_vm_run_queue, - emqx_vm_process_messages_in_queues, - emqx_vm_total_memory, - emqx_vm_used_memory - ]. - -emqx_vm_data() -> - emqx_mgmt:vm_stats(). - -emqx_cluster() -> - [ - emqx_cluster_nodes_running, - emqx_cluster_nodes_stopped - ]. - -emqx_cluster_data() -> +cluster_data() -> Running = emqx:cluster_nodes(running), Stopped = emqx:cluster_nodes(stopped), + #{ + emqx_cluster_nodes_running => [{[], length(Running)}], + emqx_cluster_nodes_stopped => [{[], length(Stopped)}] + }. + +%%======================================== +%% Metrics +%%======================================== + +emqx_metric_data(MetricNameTypeKeyL) -> + Metrics = emqx_metrics:all(), + lists:foldl( + fun({Name, _Type, MetricKAtom}, AccIn) -> + AccIn#{Name => [{[], ?C(MetricKAtom, Metrics)}]} + end, + #{}, + MetricNameTypeKeyL + ). + +%%========== +%% Bytes && Packets +emqx_packet_metric_meta() -> [ - {nodes_running, length(Running)}, - {nodes_stopped, length(Stopped)} + {emqx_bytes_received, counter, 'bytes.received'}, + {emqx_bytes_sent, counter, 'bytes.sent'}, + %% received.sent + {emqx_packets_received, counter, 'packets.received'}, + {emqx_packets_sent, counter, 'packets.sent'}, + %% connect + {emqx_packets_connect, counter, 'packets.connect.received'}, + {emqx_packets_connack_sent, counter, 'packets.connack.sent'}, + {emqx_packets_connack_error, counter, 'packets.connack.error'}, + {emqx_packets_connack_auth_error, counter, 'packets.connack.auth_error'}, + %% sub.unsub + {emqx_packets_subscribe_received, counter, 'packets.subscribe.received'}, + {emqx_packets_subscribe_auth_error, counter, 'packets.subscribe.auth_error'}, + {emqx_packets_subscribe_error, counter, 'packets.subscribe.error'}, + {emqx_packets_suback_sent, counter, 'packets.suback.sent'}, + {emqx_packets_unsubscribe_received, counter, 'packets.unsubscribe.received'}, + {emqx_packets_unsubscribe_error, counter, 'packets.unsubscribe.error'}, + {emqx_packets_unsuback_sent, counter, 'packets.unsuback.sent'}, + %% publish.puback + {emqx_packets_publish_received, counter, 'packets.publish.received'}, + {emqx_packets_publish_sent, counter, 'packets.publish.sent'}, + {emqx_packets_publish_inuse, counter, 'packets.publish.inuse'}, + {emqx_packets_publish_error, counter, 'packets.publish.error'}, + {emqx_packets_publish_auth_error, counter, 'packets.publish.auth_error'}, + {emqx_packets_publish_dropped, counter, 'packets.publish.dropped'}, + %% puback + {emqx_packets_puback_received, counter, 'packets.puback.received'}, + {emqx_packets_puback_sent, counter, 'packets.puback.sent'}, + {emqx_packets_puback_inuse, counter, 'packets.puback.inuse'}, + {emqx_packets_puback_missed, counter, 'packets.puback.missed'}, + %% pubrec + {emqx_packets_pubrec_received, counter, 'packets.pubrec.received'}, + {emqx_packets_pubrec_sent, counter, 'packets.pubrec.sent'}, + {emqx_packets_pubrec_inuse, counter, 'packets.pubrec.inuse'}, + {emqx_packets_pubrec_missed, counter, 'packets.pubrec.missed'}, + %% pubrel + {emqx_packets_pubrel_received, counter, 'packets.pubrel.received'}, + {emqx_packets_pubrel_sent, counter, 'packets.pubrel.sent'}, + {emqx_packets_pubrel_missed, counter, 'packets.pubrel.missed'}, + %% pubcomp + {emqx_packets_pubcomp_received, counter, 'packets.pubcomp.received'}, + {emqx_packets_pubcomp_sent, counter, 'packets.pubcomp.sent'}, + {emqx_packets_pubcomp_inuse, counter, 'packets.pubcomp.inuse'}, + {emqx_packets_pubcomp_missed, counter, 'packets.pubcomp.missed'}, + %% pingreq + {emqx_packets_pingreq_received, counter, 'packets.pingreq.received'}, + {emqx_packets_pingresp_sent, counter, 'packets.pingresp.sent'}, + %% disconnect + {emqx_packets_disconnect_received, counter, 'packets.disconnect.received'}, + {emqx_packets_disconnect_sent, counter, 'packets.disconnect.sent'}, + %% auth + {emqx_packets_auth_received, counter, 'packets.auth.received'}, + {emqx_packets_auth_sent, counter, 'packets.auth.sent'} ]. +%%========== +%% Messages +message_metric_meta() -> + [ + {emqx_messages_received, counter, 'messages.received'}, + {emqx_messages_sent, counter, 'messages.sent'}, + {emqx_messages_qos0_received, counter, 'messages.qos0.received'}, + {emqx_messages_qos0_sent, counter, 'messages.qos0.sent'}, + {emqx_messages_qos1_received, counter, 'messages.qos1.received'}, + {emqx_messages_qos1_sent, counter, 'messages.qos1.sent'}, + {emqx_messages_qos2_received, counter, 'messages.qos2.received'}, + {emqx_messages_qos2_sent, counter, 'messages.qos2.sent'}, + {emqx_messages_publish, counter, 'messages.publish'}, + {emqx_messages_dropped, counter, 'messages.dropped'}, + {emqx_messages_dropped_expired, counter, 'messages.dropped.await_pubrel_timeout'}, + {emqx_messages_dropped_no_subscribers, counter, 'messages.dropped.no_subscribers'}, + {emqx_messages_forward, counter, 'messages.forward'}, + {emqx_messages_retained, counter, 'messages.retained'}, + {emqx_messages_delayed, counter, 'messages.delayed'}, + {emqx_messages_delivered, counter, 'messages.delivered'}, + {emqx_messages_acked, counter, 'messages.acked'} + ]. + +%%========== +%% Delivery +delivery_metric_meta() -> + [ + {emqx_delivery_dropped, counter, 'delivery.dropped'}, + {emqx_delivery_dropped_no_local, counter, 'delivery.dropped.no_local'}, + {emqx_delivery_dropped_too_large, counter, 'delivery.dropped.too_large'}, + {emqx_delivery_dropped_qos0_msg, counter, 'delivery.dropped.qos0_msg'}, + {emqx_delivery_dropped_queue_full, counter, 'delivery.dropped.queue_full'}, + {emqx_delivery_dropped_expired, counter, 'delivery.dropped.expired'} + ]. + +%%========== +%% Client +client_metric_meta() -> + [ + {emqx_client_connect, counter, 'client.connect'}, + {emqx_client_connack, counter, 'client.connack'}, + {emqx_client_connected, counter, 'client.connected'}, + {emqx_client_authenticate, counter, 'client.authenticate'}, + {emqx_client_auth_anonymous, counter, 'client.auth.anonymous'}, + {emqx_client_authorize, counter, 'client.authorize'}, + {emqx_client_subscribe, counter, 'client.subscribe'}, + {emqx_client_unsubscribe, counter, 'client.unsubscribe'}, + {emqx_client_disconnected, counter, 'client.disconnected'} + ]. + +%%========== +%% Metrics - session +session_metric_meta() -> + [ + {emqx_session_created, counter, 'session.created'}, + {emqx_session_resumed, counter, 'session.resumed'}, + {emqx_session_takenover, counter, 'session.takenover'}, + {emqx_session_discarded, counter, 'session.discarded'}, + {emqx_session_terminated, counter, 'session.terminated'} + ]. + +%%========== +%% Metrics - acl +acl_metric_meta() -> + [ + {emqx_authorization_allow, counter, 'authorization.allow'}, + {emqx_authorization_deny, counter, 'authorization.deny'}, + {emqx_authorization_cache_hit, counter, 'authorization.cache_hit'}, + {emqx_authorization_cache_miss, counter, 'authorization.cache_miss'}, + {emqx_authorization_superuser, counter, 'authorization.superuser'}, + {emqx_authorization_nomatch, counter, 'authorization.nomatch'}, + {emqx_authorization_matched_allow, counter, 'authorization.matched_allow'}, + {emqx_authorization_matched_deny, counter, 'authorization.matched_deny'} + ]. + +%%========== +%% Metrics - authn +authn_metric_meta() -> + [ + {emqx_authentication_success, counter, 'authentication.success'}, + {emqx_authentication_success_anonymous, counter, 'authentication.success.anonymous'}, + {emqx_authentication_failure, counter, 'authentication.failure'} + ]. + +%%========== +%% Overload Protection +olp_metric_meta() -> + emqx_metrics_olp_meta(emqx_config_zones:is_olp_enabled()). + +emqx_metrics_olp_meta(true) -> + [ + {emqx_overload_protection_delay_ok, counter, 'overload_protection.delay.ok'}, + {emqx_overload_protection_delay_timeout, counter, 'overload_protection.delay.timeout'}, + {emqx_overload_protection_hibernation, counter, 'overload_protection.hibernation'}, + {emqx_overload_protection_gc, counter, 'overload_protection.gc'}, + {emqx_overload_protection_new_conn, counter, 'overload_protection.new_conn'} + ]; +emqx_metrics_olp_meta(false) -> + []. + +%%======================================== +%% License +%%======================================== + -if(?EMQX_RELEASE_EDITION == ee). -emqx_license() -> + +maybe_license_add_collect_family(Callback, RawData) -> + ok = add_collect_family(Callback, license_metric_meta(), ?MG(license_data, RawData)), + ok. + +maybe_license_fetch_data() -> + #{license_data => license_data()}. + +maybe_license_collect_json_data(RawData) -> + #{license => ?MG(license_data, RawData)}. + +%% license +license_metric_meta() -> [ - emqx_license_expiry_at + {emqx_license_expiry_at, gauge, undefined} ]. -emqx_license_data() -> - [ - {expiry_at, emqx_license_checker:expiry_epoch()} - ]. +license_data() -> + #{emqx_license_expiry_at => emqx_license_checker:expiry_epoch()}. + -else. +maybe_license_add_collect_family(_, _) -> + ok. + +maybe_license_fetch_data() -> + #{}. + +maybe_license_collect_json_data(_RawData) -> + #{}. + -endif. -emqx_certs() -> +%%======================================== +%% Certs +%%======================================== + +cert_metric_meta() -> [ - emqx_cert_expiry_at + {emqx_cert_expiry_at, gauge, undefined} ]. -define(LISTENER_TYPES, [ssl, wss, quic]). --spec emqx_certs_data() -> +-spec cert_data() -> [_Point :: {[Label], Epoch}] when Label :: TypeLabel | NameLabel, TypeLabel :: {listener_type, ssl | wss | quic}, NameLabel :: {listener_name, atom()}, Epoch :: non_neg_integer(). -emqx_certs_data() -> - case emqx_config:get([listeners], undefined) of - undefined -> - []; - AllListeners when is_map(AllListeners) -> - lists:foldl( - fun(ListenerType, PointsAcc) -> - PointsAcc ++ - points_of_listeners(ListenerType, AllListeners) - end, - _PointsInitAcc = [], - ?LISTENER_TYPES - ) - end. +cert_data() -> + cert_data(emqx_config:get([listeners], undefined)). + +cert_data(undefined) -> + []; +cert_data(AllListeners) -> + Points = lists:foldl( + fun(ListenerType, PointsAcc) -> + PointsAcc ++ + points_of_listeners(ListenerType, AllListeners) + end, + _PointsInitAcc = [], + ?LISTENER_TYPES + ), + #{ + emqx_cert_expiry_at => Points + }. points_of_listeners(Type, AllListeners) -> do_points_of_listeners(Type, maps:get(Type, AllListeners, undefined)). @@ -803,24 +834,7 @@ do_points_of_listeners(ListenerType, TypeOfListeners) -> ). gen_point(Type, Name, Path) -> - { - %% Labels: [{_Labelkey, _LabelValue}] - [ - {listener_type, Type}, - {listener_name, Name} - ], - %% Value - cert_expiry_at_from_path(Path) - }. - -collect_certs_json(CertsData) -> - lists:foldl( - fun({Labels, Data}, AccIn) -> - [(maps:from_list(Labels))#{emqx_cert_expiry_at => Data} | AccIn] - end, - _InitAcc = [], - CertsData - ). + {[{listener_type, Type}, {listener_name, Name}], cert_expiry_at_from_path(Path)}. %% TODO: cert manager for more generic utils functions cert_expiry_at_from_path(Path0) -> @@ -849,6 +863,59 @@ utc_time_to_datetime(Str) -> date_to_expiry_epoch(DateTime) -> calendar:datetime_to_gregorian_seconds(DateTime) - ?EPOCH_START. +%%-------------------------------------------------------------------- +%% Collect functions +%%-------------------------------------------------------------------- + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% merge / zip formatting funcs for type `application/json` + +%% always return json array +collect_cert_json_data(Data) -> + collect_json_data_(Data). + +collect_json_data(Data0) -> + DataListPerNode = collect_json_data_(Data0), + case {?GET_PROM_DATA_MODE(), DataListPerNode} of + %% all nodes results unaggregated, should be a list + {?PROM_DATA_MODE__ALL_NODES_UNAGGREGATED, _} -> + DataListPerNode; + %% only local node result [#{...}] + %% To guaranteed compatibility, return a json object, not array + {?PROM_DATA_MODE__NODE, [NData | _]} -> + NData; + %% All nodes results aggregated + %% return a json object, not array + {?PROM_DATA_MODE__ALL_NODES_AGGREGATED, [NData | _]} -> + NData; + %% olp maybe not enabled, with empty list to empty object + {_, []} -> + #{} + end. + +collect_json_data_(Data) -> + emqx_prometheus_cluster:collect_json_data(Data, fun zip_json_prom_stats_metrics/3). + +zip_json_prom_stats_metrics(Key, Points, [] = _AccIn) -> + lists:foldl( + fun({Lables, Metric}, AccIn2) -> + LablesKVMap = maps:from_list(Lables), + Point = LablesKVMap#{Key => Metric}, + [Point | AccIn2] + end, + [], + Points + ); +zip_json_prom_stats_metrics(Key, Points, AllResultedAcc) -> + ThisKeyResult = lists:foldl(emqx_prometheus_cluster:point_to_map_fun(Key), [], Points), + lists:zipwith(fun maps:merge/2, AllResultedAcc, ThisKeyResult). + +metrics_name(MetricsAll) -> + [Name || {Name, _, _} <- MetricsAll]. + +%%-------------------------------------------------------------------- +%% bpapi + %% deprecated_since 5.0.10, remove this when 5.1.x do_start() -> emqx_prometheus_sup:start_child(?APP). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl index 5fa9057da..0d0607518 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_auth.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_auth.erl @@ -81,43 +81,6 @@ -define(MG0(K, MAP), maps:get(K, MAP, 0)). -define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). --define(AUTHNS_WITH_TYPE, [ - {emqx_authn_enable, gauge}, - {emqx_authn_status, gauge}, - {emqx_authn_nomatch, counter}, - {emqx_authn_total, counter}, - {emqx_authn_success, counter}, - {emqx_authn_failed, counter} -]). - --define(AUTHZS_WITH_TYPE, [ - {emqx_authz_enable, gauge}, - {emqx_authz_status, gauge}, - {emqx_authz_nomatch, counter}, - {emqx_authz_total, counter}, - {emqx_authz_success, counter}, - {emqx_authz_failed, counter} -]). - --define(AUTHN_USERS_COUNT_WITH_TYPE, [ - {emqx_authn_users_count, gauge} -]). - --define(AUTHZ_RULES_COUNT_WITH_TYPE, [ - {emqx_authz_rules_count, gauge} -]). - --define(BANNED_WITH_TYPE, [ - {emqx_banned_count, gauge} -]). - --define(LOGICAL_SUM_METRIC_NAMES, [ - emqx_authn_enable, - emqx_authn_status, - emqx_authz_enable, - emqx_authz_status -]). - %%-------------------------------------------------------------------- %% Collector API %%-------------------------------------------------------------------- @@ -132,11 +95,11 @@ deregister_cleanup(_) -> ok. %% erlfmt-ignore collect_mf(?PROMETHEUS_AUTH_REGISTRY, Callback) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), - ok = add_collect_family(Callback, ?AUTHNS_WITH_TYPE, ?MG(authn, RawData)), - ok = add_collect_family(Callback, ?AUTHN_USERS_COUNT_WITH_TYPE, ?MG(authn_users_count, RawData)), - ok = add_collect_family(Callback, ?AUTHZS_WITH_TYPE, ?MG(authz, RawData)), - ok = add_collect_family(Callback, ?AUTHZ_RULES_COUNT_WITH_TYPE, ?MG(authz_rules_count, RawData)), - ok = add_collect_family(Callback, ?BANNED_WITH_TYPE, ?MG(banned_count, RawData)), + ok = add_collect_family(Callback, authn_metric_meta(), ?MG(authn_data, RawData)), + ok = add_collect_family(Callback, authn_users_count_metric_meta(), ?MG(authn_users_count_data, RawData)), + ok = add_collect_family(Callback, authz_metric_meta(), ?MG(authz_data, RawData)), + ok = add_collect_family(Callback, authz_rules_count_metric_meta(), ?MG(authz_rules_count_data, RawData)), + ok = add_collect_family(Callback, banned_count_metric_meta(), ?MG(banned_count_data, RawData)), ok; collect_mf(_, _) -> ok. @@ -145,8 +108,8 @@ collect_mf(_, _) -> collect(<<"json">>) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), #{ - emqx_authn => collect_json_data(?MG(authn, RawData)), - emqx_authz => collect_json_data(?MG(authz, RawData)), + emqx_authn => collect_json_data(?MG(authn_data, RawData)), + emqx_authz => collect_json_data(?MG(authz_data, RawData)), emqx_banned => collect_banned_data() }; collect(<<"prometheus">>) -> @@ -165,25 +128,30 @@ collect_metrics(Name, Metrics) -> %% behaviour fetch_data_from_local_node() -> {node(self()), #{ - authn => authn_data(), - authz => authz_data() + authn_data => authn_data(), + authz_data => authz_data() }}. fetch_cluster_consistented_data() -> #{ - authn_users_count => authn_users_count_data(), - authz_rules_count => authz_rules_count_data(), - banned_count => banned_count_data() + authn_users_count_data => authn_users_count_data(), + authz_rules_count_data => authz_rules_count_data(), + banned_count_data => banned_count_data() }. aggre_or_zip_init_acc() -> #{ - authn => maps:from_keys(authn_metric_names(), []), - authz => maps:from_keys(authz_metric_names(), []) + authn_data => maps:from_keys(authn_metric(names), []), + authz_data => maps:from_keys(authz_metric(names), []) }. logic_sum_metrics() -> - ?LOGICAL_SUM_METRIC_NAMES. + [ + emqx_authn_enable, + emqx_authn_status, + emqx_authz_enable, + emqx_authz_status + ]. %%-------------------------------------------------------------------- %% Collector @@ -243,6 +211,19 @@ collect_auth(emqx_banned_count, Data) -> %%==================== %% Authn overview +authn_metric_meta() -> + [ + {emqx_authn_enable, gauge}, + {emqx_authn_status, gauge}, + {emqx_authn_nomatch, counter}, + {emqx_authn_total, counter}, + {emqx_authn_success, counter}, + {emqx_authn_failed, counter} + ]. + +authn_metric(names) -> + emqx_prometheus_cluster:metric_names(authn_metric_meta()). + -spec authn_data() -> #{Key => [Point]} when Key :: authn_metric_name(), Point :: {[Label], Metric}, @@ -256,7 +237,7 @@ authn_data() -> AccIn#{Key => authn_backend_to_points(Key, Authns)} end, #{}, - authn_metric_names() + authn_metric(names) ). -spec authn_backend_to_points(Key, list(Authn)) -> list(Point) when @@ -287,15 +268,17 @@ lookup_authn_metrics_local(Id) -> emqx_authn_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authn_metric_names() -- [emqx_authn_enable], 0) + maps:from_keys(authn_metric(names) -- [emqx_authn_enable], 0) end. -authn_metric_names() -> - emqx_prometheus_cluster:metric_names(?AUTHNS_WITH_TYPE). - %%==================== %% Authn users count +authn_users_count_metric_meta() -> + [ + {emqx_authn_users_count, gauge} + ]. + -define(AUTHN_MNESIA, emqx_authn_mnesia). -define(AUTHN_SCRAM_MNESIA, emqx_authn_scram_mnesia). @@ -321,6 +304,19 @@ authn_users_count_data() -> %%==================== %% Authz overview +authz_metric_meta() -> + [ + {emqx_authz_enable, gauge}, + {emqx_authz_status, gauge}, + {emqx_authz_nomatch, counter}, + {emqx_authz_total, counter}, + {emqx_authz_success, counter}, + {emqx_authz_failed, counter} + ]. + +authz_metric(names) -> + emqx_prometheus_cluster:metric_names(authz_metric_meta()). + -spec authz_data() -> #{Key => [Point]} when Key :: authz_metric_name(), Point :: {[Label], Metric}, @@ -334,7 +330,7 @@ authz_data() -> AccIn#{Key => authz_backend_to_points(Key, Authzs)} end, #{}, - authz_metric_names() + authz_metric(names) ). -spec authz_backend_to_points(Key, list(Authz)) -> list(Point) when @@ -365,15 +361,17 @@ lookup_authz_metrics_local(Type) -> emqx_authz_failed => ?MG0(failed, Counters) }; {error, _Reason} -> - maps:from_keys(authz_metric_names() -- [emqx_authz_enable], 0) + maps:from_keys(authz_metric(names) -- [emqx_authz_enable], 0) end. -authz_metric_names() -> - emqx_prometheus_cluster:metric_names(?AUTHZS_WITH_TYPE). - %%==================== %% Authz rules count +authz_rules_count_metric_meta() -> + [ + {emqx_authz_rules_count, gauge} + ]. + -define(ACL_TABLE, emqx_acl). authz_rules_count_data() -> @@ -400,7 +398,13 @@ authz_rules_count_data() -> %%==================== %% Banned count --define(BANNED_TABLE, emqx_banned). +banned_count_metric_meta() -> + [ + {emqx_banned_count, gauge} + ]. +-define(BANNED_TABLE, + emqx_banned +). banned_count_data() -> mnesia_size(?BANNED_TABLE). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl index bfd011eaa..008a029a8 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_data_integration.erl @@ -65,65 +65,6 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(MG0(K, MAP), maps:get(K, MAP, 0)). --define(RULES_WITH_TYPE, [ - {emqx_rules_count, gauge} -]). - --define(CONNECTORS_WITH_TYPE, [ - {emqx_connectors_count, gauge} -]). - --define(RULES_SPECIFIC_WITH_TYPE, [ - {emqx_rule_enable, gauge}, - {emqx_rule_matched, counter}, - {emqx_rule_failed, counter}, - {emqx_rule_passed, counter}, - {emqx_rule_failed_exception, counter}, - {emqx_rule_failed_no_result, counter}, - {emqx_rule_actions_total, counter}, - {emqx_rule_actions_success, counter}, - {emqx_rule_actions_failed, counter}, - {emqx_rule_actions_failed_out_of_service, counter}, - {emqx_rule_actions_failed_unknown, counter} -]). - --define(ACTION_SPECIFIC_WITH_TYPE, [ - {emqx_action_matched, counter}, - {emqx_action_dropped, counter}, - {emqx_action_success, counter}, - {emqx_action_failed, counter}, - {emqx_action_inflight, gauge}, - {emqx_action_received, counter}, - {emqx_action_late_reply, counter}, - {emqx_action_retried, counter}, - {emqx_action_retried_success, counter}, - {emqx_action_retried_failed, counter}, - {emqx_action_dropped_resource_stopped, counter}, - {emqx_action_dropped_resource_not_found, counter}, - {emqx_action_dropped_queue_full, counter}, - {emqx_action_dropped_other, counter}, - {emqx_action_dropped_expired, counter}, - {emqx_action_queuing, gauge} -]). - --define(CONNECTOR_SPECIFIC_WITH_TYPE, [ - {emqx_connector_enable, gauge}, - {emqx_connector_status, gauge} -]). - --if(?EMQX_RELEASE_EDITION == ee). --define(SCHEMA_REGISTRY_WITH_TYPE, [ - emqx_schema_registrys_count -]). --else. --endif. - --define(LOGICAL_SUM_METRIC_NAMES, [ - emqx_rule_enable, - emqx_connector_enable, - emqx_connector_status -]). - %%-------------------------------------------------------------------- %% Callback for emqx_prometheus_cluster %%-------------------------------------------------------------------- @@ -132,28 +73,32 @@ fetch_data_from_local_node() -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), {node(self()), #{ - rule_specific_data => rule_specific_data(Rules), - action_specific_data => action_specific_data(Bridges), - connector_specific_data => connector_specific_data(Bridges) + rule_metric_data => rule_metric_data(Rules), + action_metric_data => action_metric_data(Bridges), + connector_metric_data => connector_metric_data(Bridges) }}. fetch_cluster_consistented_data() -> Rules = emqx_rule_engine:get_rules(), Bridges = emqx_bridge:list(), (maybe_collect_schema_registry())#{ - rules_data => rules_data(Rules), - connectors_data => connectors_data(Bridges) + rules_ov_data => rules_ov_data(Rules), + connectors_ov_data => connectors_ov_data(Bridges) }. aggre_or_zip_init_acc() -> #{ - rule_specific_data => maps:from_keys(rule_specific_metric_names(), []), - action_specific_data => maps:from_keys(action_specific_metric_names(), []), - connector_specific_data => maps:from_keys(connectr_specific_metric_names(), []) + rule_metric_data => maps:from_keys(rule_metric(names), []), + action_metric_data => maps:from_keys(action_metric(names), []), + connector_metric_data => maps:from_keys(connectr_metric(names), []) }. logic_sum_metrics() -> - ?LOGICAL_SUM_METRIC_NAMES. + [ + emqx_rule_enable, + emqx_connector_enable, + emqx_connector_status + ]. %%-------------------------------------------------------------------- %% Collector API @@ -170,21 +115,23 @@ collect_mf(?PROMETHEUS_DATA_INTEGRATION_REGISTRY, Callback) -> RawData = emqx_prometheus_cluster:raw_data(?MODULE, ?GET_PROM_DATA_MODE()), %% Data Integration Overview - ok = add_collect_family(Callback, ?RULES_WITH_TYPE, ?MG(rules_data, RawData)), - ok = add_collect_family(Callback, ?CONNECTORS_WITH_TYPE, ?MG(connectors_data, RawData)), + ok = add_collect_family(Callback, rules_ov_metric_meta(), ?MG(rules_ov_data, RawData)), + ok = add_collect_family( + Callback, connectors_ov_metric_meta(), ?MG(connectors_ov_data, RawData) + ), ok = maybe_collect_family_schema_registry(Callback), - %% Rule Specific - RuleSpecificDs = ?MG(rule_specific_data, RawData), - ok = add_collect_family(Callback, ?RULES_SPECIFIC_WITH_TYPE, RuleSpecificDs), + %% Rule Metric + RuleMetricDs = ?MG(rule_metric_data, RawData), + ok = add_collect_family(Callback, rule_metric_meta(), RuleMetricDs), - %% Action Specific - ActionSpecificDs = ?MG(action_specific_data, RawData), - ok = add_collect_family(Callback, ?ACTION_SPECIFIC_WITH_TYPE, ActionSpecificDs), + %% Action Metric + ActionMetricDs = ?MG(action_metric_data, RawData), + ok = add_collect_family(Callback, action_metric_meta(), ActionMetricDs), - %% Connector Specific - ConnectorSpecificDs = ?MG(connector_specific_data, RawData), - ok = add_collect_family(Callback, ?CONNECTOR_SPECIFIC_WITH_TYPE, ConnectorSpecificDs), + %% Connector Metric + ConnectorMetricDs = ?MG(connector_metric_data, RawData), + ok = add_collect_family(Callback, connector_metric_meta(), ConnectorMetricDs), ok; collect_mf(_, _) -> @@ -197,9 +144,9 @@ collect(<<"json">>) -> Bridges = emqx_bridge:list(), #{ data_integration_overview => collect_data_integration_overview(Rules, Bridges), - rules => collect_json_data(?MG(rule_specific_data, RawData)), - actions => collect_json_data(?MG(action_specific_data, RawData)), - connectors => collect_json_data(?MG(connector_specific_data, RawData)) + rules => collect_json_data(?MG(rule_metric_data, RawData)), + actions => collect_json_data(?MG(action_metric_data, RawData)), + connectors => collect_json_data(?MG(connector_metric_data, RawData)) }; collect(<<"prometheus">>) -> prometheus_text_format:format(?PROMETHEUS_DATA_INTEGRATION_REGISTRY). @@ -218,21 +165,6 @@ add_collect_family(Name, Data, Callback, Type) -> collect_metrics(Name, Metrics) -> collect_di(Name, Metrics). --if(?EMQX_RELEASE_EDITION == ee). -maybe_collect_family_schema_registry(Callback) -> - ok = add_collect_family(Callback, ?SCHEMA_REGISTRY_WITH_TYPE, schema_registry_data()), - ok. - -maybe_collect_schema_registry() -> - schema_registry_data(). --else. -maybe_collect_family_schema_registry(_) -> - ok. - -maybe_collect_schema_registry() -> - #{}. --endif. - %%-------------------------------------------------------------------- %% Collector %%-------------------------------------------------------------------- @@ -244,88 +176,54 @@ maybe_collect_schema_registry() -> %%==================== %% All Rules %% Rules -collect_di(K = emqx_rules_count, Data) -> - gauge_metric(?MG(K, Data)); +collect_di(K = emqx_rules_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Schema Registry -collect_di(K = emqx_schema_registrys_count, Data) -> - gauge_metric(?MG(K, Data)); +collect_di(K = emqx_schema_registrys_count, Data) -> gauge_metric(?MG(K, Data)); %%==================== %% Connectors -collect_di(K = emqx_connectors_count, Data) -> - gauge_metric(?MG(K, Data)); +collect_di(K = emqx_connectors_count, Data) -> gauge_metric(?MG(K, Data)); %%======================================== -%% Data Integration for Specific: Rule && Action && Connector +%% Data Integration Metric for: Rule && Action && Connector %%======================================== %%==================== -%% Specific Rule -collect_di(K = emqx_rule_enable, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_matched, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_passed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_failed_exception, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_failed_no_result, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_total, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_success, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_rule_actions_failed_unknown, Data) -> - counter_metrics(?MG(K, Data)); +%% Rule Metric +collect_di(K = emqx_rule_enable, Data) -> gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_matched, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_passed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_exception, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_failed_no_result, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_total, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_success, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_out_of_service, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_rule_actions_failed_unknown, Data) -> counter_metrics(?MG(K, Data)); %%==================== -%% Specific Action - -collect_di(K = emqx_action_matched, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_success, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_inflight, Data) -> - %% inflight type: gauge - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_action_received, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_late_reply, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_retried, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_retried_success, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_retried_failed, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_resource_stopped, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_resource_not_found, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_queue_full, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_other, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_dropped_expired, Data) -> - counter_metrics(?MG(K, Data)); -collect_di(K = emqx_action_queuing, Data) -> - %% queuing type: gauge - gauge_metrics(?MG(K, Data)); +%% Action Metric +collect_di(K = emqx_action_matched, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_success, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_failed, Data) -> counter_metrics(?MG(K, Data)); +%% inflight type: gauge +collect_di(K = emqx_action_inflight, Data) -> gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_action_received, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_late_reply, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_success, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_retried_failed, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_stopped, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_resource_not_found, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_queue_full, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_other, Data) -> counter_metrics(?MG(K, Data)); +collect_di(K = emqx_action_dropped_expired, Data) -> counter_metrics(?MG(K, Data)); +%% queuing type: gauge +collect_di(K = emqx_action_queuing, Data) -> gauge_metrics(?MG(K, Data)); %%==================== -%% Specific Connector - -collect_di(K = emqx_connector_enable, Data) -> - gauge_metrics(?MG(K, Data)); -collect_di(K = emqx_connector_status, Data) -> - gauge_metrics(?MG(K, Data)). +%% Connector Metric +collect_di(K = emqx_connector_enable, Data) -> gauge_metrics(?MG(K, Data)); +collect_di(K = emqx_connector_status, Data) -> gauge_metrics(?MG(K, Data)). %%-------------------------------------------------------------------- %% Internal functions @@ -338,8 +236,16 @@ collect_di(K = emqx_connector_status, Data) -> %%==================== %% All Rules +rules_ov_metric_meta() -> + [ + {emqx_rules_count, gauge} + ]. + +rules_ov_metric(names) -> + emqx_prometheus_cluster:metric_names(rules_ov_metric_meta()). + -define(RULE_TAB, emqx_rule_engine). -rules_data(_Rules) -> +rules_ov_data(_Rules) -> #{ emqx_rules_count => ets:info(?RULE_TAB, size) }. @@ -348,36 +254,83 @@ rules_data(_Rules) -> %% Schema Registry -if(?EMQX_RELEASE_EDITION == ee). + +maybe_collect_family_schema_registry(Callback) -> + ok = add_collect_family(Callback, schema_registry_metric_meta(), schema_registry_data()), + ok. + +schema_registry_metric_meta() -> + [ + {emqx_schema_registrys_count, gauge} + ]. + schema_registry_data() -> #{ emqx_schema_registrys_count => erlang:map_size(emqx_schema_registry:list_schemas()) }. + +maybe_collect_schema_registry() -> + schema_registry_data(). + -else. + +maybe_collect_family_schema_registry(_) -> + ok. + +maybe_collect_schema_registry() -> + #{}. + -endif. %%==================== %% Connectors -connectors_data(Brdiges) -> +connectors_ov_metric_meta() -> + [ + {emqx_connectors_count, gauge} + ]. + +connectors_ov_metric(names) -> + emqx_prometheus_cluster:metric_names(connectors_ov_metric_meta()). + +connectors_ov_data(Brdiges) -> #{ %% Both Bridge V1 and V2 emqx_connectors_count => erlang:length(Brdiges) }. %%======================================== -%% Data Integration for Specific: Rule && Action && Connector +%% Data Integration Metric for: Rule && Action && Connector %%======================================== %%==================== -%% Specific Rule +%% Rule Metric %% With rule_id as label key: `rule_id` -rule_specific_data(Rules) -> +rule_metric_meta() -> + [ + {emqx_rule_enable, gauge}, + {emqx_rule_matched, counter}, + {emqx_rule_failed, counter}, + {emqx_rule_passed, counter}, + {emqx_rule_failed_exception, counter}, + {emqx_rule_failed_no_result, counter}, + {emqx_rule_actions_total, counter}, + {emqx_rule_actions_success, counter}, + {emqx_rule_actions_failed, counter}, + {emqx_rule_actions_failed_out_of_service, counter}, + {emqx_rule_actions_failed_unknown, counter} + ]. + +rule_metric(names) -> + emqx_prometheus_cluster:metric_names(rule_metric_meta()). + +rule_metric_data(Rules) -> lists:foldl( fun(#{id := Id} = Rule, AccIn) -> merge_acc_with_rules(Id, get_metric(Rule), AccIn) end, - maps:from_keys(rule_specific_metric_names(), []), + maps:from_keys(rule_metric(names), []), Rules ). @@ -413,20 +366,40 @@ get_metric(#{id := Id, enable := Bool} = _Rule) -> } end. -rule_specific_metric_names() -> - emqx_prometheus_cluster:metric_names(?RULES_SPECIFIC_WITH_TYPE). - %%==================== -%% Specific Action +%% Action Metric %% With action_id: `{type}:{name}` as label key: `action_id` -action_specific_data(Bridges) -> +action_metric_meta() -> + [ + {emqx_action_matched, counter}, + {emqx_action_dropped, counter}, + {emqx_action_success, counter}, + {emqx_action_failed, counter}, + {emqx_action_inflight, gauge}, + {emqx_action_received, counter}, + {emqx_action_late_reply, counter}, + {emqx_action_retried, counter}, + {emqx_action_retried_success, counter}, + {emqx_action_retried_failed, counter}, + {emqx_action_dropped_resource_stopped, counter}, + {emqx_action_dropped_resource_not_found, counter}, + {emqx_action_dropped_queue_full, counter}, + {emqx_action_dropped_other, counter}, + {emqx_action_dropped_expired, counter}, + {emqx_action_queuing, gauge} + ]. + +action_metric(names) -> + emqx_prometheus_cluster:metric_names(action_metric_meta()). + +action_metric_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = _Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_bridges(Id, get_bridge_metric(Type, Name), AccIn) end, - maps:from_keys(action_specific_metric_names(), []), + maps:from_keys(action_metric(names), []), Bridges ). @@ -467,20 +440,26 @@ get_bridge_metric(Type, Name) -> } end. -action_specific_metric_names() -> - emqx_prometheus_cluster:metric_names(?ACTION_SPECIFIC_WITH_TYPE). - %%==================== -%% Specific Connector +%% Connector Metric %% With connector_id: `{type}:{name}` as label key: `connector_id` -connector_specific_data(Bridges) -> +connector_metric_meta() -> + [ + {emqx_connector_enable, gauge}, + {emqx_connector_status, gauge} + ]. + +connectr_metric(names) -> + emqx_prometheus_cluster:metric_names(connector_metric_meta()). + +connector_metric_data(Bridges) -> lists:foldl( fun(#{type := Type, name := Name} = Bridge, AccIn) -> Id = emqx_bridge_resource:bridge_id(Type, Name), merge_acc_with_connectors(Id, get_connector_status(Bridge), AccIn) end, - maps:from_keys(connectr_specific_metric_names(), []), + maps:from_keys(connectr_metric(names), []), Bridges ). @@ -504,9 +483,6 @@ get_connector_status(#{resource_data := ResourceData} = _Bridge) -> emqx_connector_status => emqx_prometheus_cluster:status_to_number(Status) }. -connectr_specific_metric_names() -> - emqx_prometheus_cluster:metric_names(?CONNECTOR_SPECIFIC_WITH_TYPE). - %%-------------------------------------------------------------------- %% Collect functions %%-------------------------------------------------------------------- @@ -514,18 +490,18 @@ connectr_specific_metric_names() -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% merge / zip formatting funcs for type `application/json` collect_data_integration_overview(Rules, Bridges) -> - RulesD = rules_data(Rules), - ConnectorsD = connectors_data(Bridges), + RulesD = rules_ov_data(Rules), + ConnectorsD = connectors_ov_data(Bridges), M1 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, RulesD)} end, #{}, - emqx_prometheus_cluster:metric_names(?RULES_WITH_TYPE) + rules_ov_metric(names) ), M2 = lists:foldl( fun(K, AccIn) -> AccIn#{K => ?MG(K, ConnectorsD)} end, #{}, - emqx_prometheus_cluster:metric_names(?CONNECTORS_WITH_TYPE) + connectors_ov_metric(names) ), M3 = maybe_collect_schema_registry(), From 2263df0242ac617ecdbdb5a2e05b56f145e54995 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 21 Jan 2024 23:11:30 +0800 Subject: [PATCH 36/38] fix(prom_push_gw): use format mode `node` for prometheus push gateway --- apps/emqx_prometheus/src/emqx_prometheus_cluster.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl index e48df0f8b..2a68c7b3b 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_cluster.erl @@ -41,6 +41,9 @@ -define(MG(K, MAP), maps:get(K, MAP)). -define(PG0(K, PROPLISTS), proplists:get_value(K, PROPLISTS, 0)). +raw_data(Module, undefined) -> + %% TODO: for push gateway, the format mode should be configurable + raw_data(Module, ?PROM_DATA_MODE__NODE); raw_data(Module, ?PROM_DATA_MODE__ALL_NODES_AGGREGATED) -> AllNodesMetrics = aggre_cluster(Module), Cluster = Module:fetch_cluster_consistented_data(), From 2061d75b50471eded6ee932386836440220692de Mon Sep 17 00:00:00 2001 From: JimMoen Date: Sun, 21 Jan 2024 22:46:27 +0800 Subject: [PATCH 37/38] docs: prometheus api `mode` field description --- .../emqx_prometheus/src/emqx_prometheus_api.erl | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 47a5b0299..89bfa6e6a 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -126,6 +126,7 @@ security() -> false -> [] end. +%% erlfmt-ignore fields(mode) -> [ {mode, @@ -133,7 +134,21 @@ fields(mode) -> hoconsc:enum(?PROM_DATA_MODES), #{ default => node, - desc => <<"Metrics format mode.">>, + desc => <<" +Metrics format mode. + +`node`: +Return metrics from local node. And it is the default behaviour if `mode` not specified. + +`all_nodes_aggregated`: +Return metrics for all nodes. +And if possible, calculate the arithmetic sum or logical sum of the indicators of all nodes. + +`all_nodes_unaggregated`: +Return metrics from all nodes, and the metrics are not aggregated. +The node name will be included in the returned results to +indicate that certain metrics were returned on a certain node. +">>, in => query, required => false, example => node From c2f26e8e982c06b56f256ae0dce5fca9d1eb0113 Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 18 Jan 2024 17:18:42 +0800 Subject: [PATCH 38/38] docs: bump change log --- changes/ce/feat-12299.en.md | 15 +++++++++++++++ changes/ee/feat-12299.en.md | 17 +++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 changes/ce/feat-12299.en.md create mode 100644 changes/ee/feat-12299.en.md diff --git a/changes/ce/feat-12299.en.md b/changes/ce/feat-12299.en.md new file mode 100644 index 000000000..1721970e4 --- /dev/null +++ b/changes/ce/feat-12299.en.md @@ -0,0 +1,15 @@ +Expose more metrics to improve observability: + +Montior API: + - Add `retained_msg_count` field to `/api/v5/monitor_current`. + - Add `retained_msg_count` and `node_uptime` fields to `/api/v5/monitor_current/nodes/{node}`. + +Prometheus API: + - Add `emqx_cert_expiry_at` to `/api/v5/prometheus/stats` to display TLS listener certificate expiration time. + - Add `/api/v5/prometheus/auth` endpoint to provide metrics such as execution count and running status for all authenticatiors and authorizators. + - Add `/api/v5/prometheus/data_integration` endpoint to provide metrics such as execution count and status for all rules, actions, and connectors. + +Limitations: + Prometheus push gateway only supports content in `/api/v5/prometheus/stats?mode=node` for now. + +For more API details and metric type information. Please see also in swagger api docs. diff --git a/changes/ee/feat-12299.en.md b/changes/ee/feat-12299.en.md new file mode 100644 index 000000000..629928b90 --- /dev/null +++ b/changes/ee/feat-12299.en.md @@ -0,0 +1,17 @@ +# Expose more metrics to improve observability: + +Montior API: + - Add `retained_msg_count` field to `/api/v5/monitor_current`. + - Add `license_quota` field to `/api/v5/monitor_current` + - Add `retained_msg_count` and `node_uptime` fields to `/api/v5/monitor_current/nodes/{node}`. + - Add `retained_msg_count`, `license_quota` and `node_uptime` fields to `/api/v5/monitor_current/nodes/{node}`. + +Prometheus API: + - Add `emqx_cert_expiry_at` and `emqx_license_expiry_at` to `/api/v5/prometheus/stats` to display TLS listener certificate expiration time and license expiration time. + - Add `/api/v5/prometheus/auth` endpoint to provide metrics such as execution count and running status for all authenticatiors and authorizators. + - Add `/api/v5/prometheus/data_integration` endpoint to provide metrics such as execution count and status for all rules, actions, and connectors. + +Limitations: + Prometheus push gateway only supports the content in `/api/v5/prometheus/stats?mode=node` + +For more API details and metric type information. Please see also in swagger api docs.