From e22f8ff2a84d937ded206c5346797c637b4f545b Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 26 Oct 2022 17:06:32 +0800 Subject: [PATCH 1/6] refactor: sync emqx_prometheus via emqx_config_handler --- .../include/emqx_prometheus.hrl | 1 + apps/emqx_prometheus/src/emqx_prometheus.erl | 112 +++++++----------- .../src/emqx_prometheus_api.erl | 10 +- .../src/emqx_prometheus_app.erl | 15 +-- .../src/emqx_prometheus_config.erl | 51 ++++++++ .../src/emqx_prometheus_schema.erl | 9 +- .../src/emqx_prometheus_sup.erl | 21 ++-- .../test/emqx_prometheus_SUITE.erl | 2 +- .../test/emqx_prometheus_api_SUITE.erl | 17 ++- 9 files changed, 138 insertions(+), 100 deletions(-) create mode 100644 apps/emqx_prometheus/src/emqx_prometheus_config.erl diff --git a/apps/emqx_prometheus/include/emqx_prometheus.hrl b/apps/emqx_prometheus/include/emqx_prometheus.hrl index 589bbd024..36066a55d 100644 --- a/apps/emqx_prometheus/include/emqx_prometheus.hrl +++ b/apps/emqx_prometheus/include/emqx_prometheus.hrl @@ -1 +1,2 @@ -define(APP, emqx_prometheus). +-define(PROMETHEUS, [prometheus]). diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index d2c09774c..4edf371cc 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -37,18 +37,8 @@ ] ). --export([ - update/1, - start/0, - stop/0, - restart/0, - % for rpc - do_start/0, - do_stop/0 -]). - %% APIs --export([start_link/1]). +-export([start_link/1, info/0]). %% gen_server callbacks -export([ @@ -73,84 +63,59 @@ -define(TIMER_MSG, '#interval'). --record(state, {push_gateway, timer, interval}). - -%%-------------------------------------------------------------------- -%% update new config -update(Config) -> - case - emqx_conf:update( - [prometheus], - Config, - #{rawconf_with_defaults => true, override_to => cluster} - ) - of - {ok, #{raw_config := NewConfigRows}} -> - case maps:get(<<"enable">>, Config, true) of - true -> - ok = restart(); - false -> - ok = stop() - end, - {ok, NewConfigRows}; - {error, Reason} -> - {error, Reason} - end. - -start() -> - {_, []} = emqx_prometheus_proto_v1:start(mria_mnesia:running_nodes()), - ok. - -stop() -> - {_, []} = emqx_prometheus_proto_v1:stop(mria_mnesia:running_nodes()), - ok. - -restart() -> - ok = stop(), - ok = start(). - -do_start() -> - emqx_prometheus_sup:start_child(?APP, emqx_conf:get([prometheus])). - -do_stop() -> - case emqx_prometheus_sup:stop_child(?APP) of - ok -> - ok; - {error, not_found} -> - ok - end. +-define(HTTP_OPTIONS, [{autoredirect, true}, {timeout, 60000}]). %%-------------------------------------------------------------------- %% APIs %%-------------------------------------------------------------------- -start_link(Opts) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [Opts], []). +start_link([]) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +info() -> + gen_server:call(?MODULE, info). %%-------------------------------------------------------------------- %% gen_server callbacks %%-------------------------------------------------------------------- -init([Opts]) -> - Interval = maps:get(interval, Opts), - PushGateway = maps:get(push_gateway_server, Opts), - {ok, ensure_timer(#state{push_gateway = PushGateway, interval = Interval})}. +init([]) -> + #{interval := Interval} = opts(), + {ok, #{timer => ensure_timer(Interval), ok => 0, failed => 0}}. +handle_call(info, _From, State) -> + {reply, State#{opts => opts()}, State}; handle_call(_Msg, _From, State) -> - {noreply, State}. + {reply, ok, State}. handle_cast(_Msg, State) -> {noreply, State}. -handle_info({timeout, R, ?TIMER_MSG}, State = #state{timer = R, push_gateway = Uri}) -> +handle_info({timeout, Timer, ?TIMER_MSG}, State = #{timer := Timer}) -> + #{interval := Interval, push_gateway_server := Server} = opts(), + PushRes = push_to_push_gateway(Server), + NewTimer = ensure_timer(Interval), + NewState = maps:update_with(PushRes, fun(C) -> C + 1 end, 1, State#{timer => NewTimer}), + %% Data is too big, hibernate for saving memory and stop system monitor warning. + {noreply, NewState, hibernate}; +handle_info(_Msg, State) -> + {noreply, State}. + +push_to_push_gateway(Uri) -> [Name, Ip] = string:tokens(atom_to_list(node()), "@"), Url = lists:concat([Uri, "/metrics/job/", Name, "/instance/", Name, "~", Ip]), Data = prometheus_text_format:format(), - httpc:request(post, {Url, [], "text/plain", Data}, [{autoredirect, true}], []), - %% Data is too big, hibernate for saving memory and stop system monitor warning. - {noreply, ensure_timer(State), hibernate}; -handle_info(_Msg, State) -> - {noreply, State}. + case httpc:request(post, {Url, [], "text/plain", Data}, ?HTTP_OPTIONS, []) of + {ok, {{"HTTP/1.1", 200, "OK"}, _Headers, _Body}} -> + ok; + Error -> + ?SLOG(error, #{ + msg => "post_to_push_gateway_failed", + error => Error, + url => Url + }), + failed + end. code_change(_OldVsn, State, _Extra) -> {ok, State}. @@ -158,11 +123,14 @@ code_change(_OldVsn, State, _Extra) -> terminate(_Reason, _State) -> ok. -ensure_timer(State = #state{interval = Interval}) -> - State#state{timer = emqx_misc:start_timer(Interval, ?TIMER_MSG)}. +ensure_timer(Interval) -> + emqx_misc:start_timer(Interval, ?TIMER_MSG). + %%-------------------------------------------------------------------- %% prometheus callbacks %%-------------------------------------------------------------------- +opts() -> + emqx_conf:get(?PROMETHEUS). deregister_cleanup(_Registry) -> ok. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_api.erl b/apps/emqx_prometheus/src/emqx_prometheus_api.erl index 9a81f3ea3..125eed560 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_api.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_api.erl @@ -84,7 +84,7 @@ schema("/prometheus/stats") -> prometheus(get, _Params) -> {200, emqx:get_raw_config([<<"prometheus">>], #{})}; prometheus(put, #{body := Body}) -> - case emqx_prometheus:update(Body) of + case emqx_prometheus_config:update(Body) of {ok, NewConfig} -> {200, NewConfig}; {error, Reason} -> @@ -120,7 +120,13 @@ prometheus_config_example() -> #{ enable => true, interval => "15s", - push_gateway_server => <<"http://127.0.0.1:9091">> + push_gateway_server => <<"http://127.0.0.1:9091">>, + vm_dist_collector => enabled, + mnesia_collector => enabled, + vm_statistics_collector => enabled, + vm_system_info_collector => enabled, + vm_memory_collector => enabled, + vm_msacc_collector => enabled }. prometheus_data_schema() -> diff --git a/apps/emqx_prometheus/src/emqx_prometheus_app.erl b/apps/emqx_prometheus/src/emqx_prometheus_app.erl index b9dd9c466..bdee12d0e 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_app.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_app.erl @@ -27,17 +27,10 @@ ]). start(_StartType, _StartArgs) -> - {ok, Sup} = emqx_prometheus_sup:start_link(), - maybe_enable_prometheus(), - {ok, Sup}. + Res = emqx_prometheus_sup:start_link(), + emqx_prometheus_config:add_handler(), + Res. stop(_State) -> + emqx_prometheus_config:remove_handler(), ok. - -maybe_enable_prometheus() -> - case emqx_conf:get([prometheus, enable], false) of - true -> - emqx_prometheus_sup:start_child(?APP, emqx_conf:get([prometheus], #{})); - false -> - ok - end. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_config.erl b/apps/emqx_prometheus/src/emqx_prometheus_config.erl new file mode 100644 index 000000000..83762b7c7 --- /dev/null +++ b/apps/emqx_prometheus/src/emqx_prometheus_config.erl @@ -0,0 +1,51 @@ +%%%------------------------------------------------------------------- +%%% @author zhongwen +%%% @copyright (C) 2022, +%%% @doc +%%% +%%% @end +%%% Created : 26. 10月 2022 11:14 +%%%------------------------------------------------------------------- +-module(emqx_prometheus_config). + +-behaviour(emqx_config_handler). + +-include("emqx_prometheus.hrl"). + +-export([add_handler/0, remove_handler/0]). +-export([post_config_update/5]). +-export([update/1]). + +update(Config) -> + case + emqx_conf:update( + [prometheus], + Config, + #{rawconf_with_defaults => true, override_to => cluster} + ) + of + {ok, #{raw_config := NewConfigRows}} -> + {ok, NewConfigRows}; + {error, Reason} -> + {error, Reason} + end. + +add_handler() -> + ok = emqx_config_handler:add_handler(?PROMETHEUS, ?MODULE), + ok. + +remove_handler() -> + ok = emqx_config_handler:remove_handler(?PROMETHEUS), + ok. + +post_config_update(?PROMETHEUS, _Req, New, _Old, AppEnvs) -> + application:set_env(AppEnvs), + update_prometheus(New), + ok; +post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) -> + ok. + +update_prometheus(#{enable := true}) -> + emqx_prometheus_sup:start_child(?APP); +update_prometheus(#{enable := false}) -> + emqx_prometheus_sup:stop_child(?APP). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_schema.erl b/apps/emqx_prometheus/src/emqx_prometheus_schema.erl index 4149485ff..09908167c 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_schema.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_schema.erl @@ -24,12 +24,13 @@ namespace/0, roots/0, fields/1, - desc/1 + desc/1, + translation/1 ]). namespace() -> "prometheus". -roots() -> ["prometheus"]. +roots() -> [{"prometheus", ?HOCON(?R_REF("prometheus"), #{translate_to => ["prometheus"]})}]. fields("prometheus") -> [ @@ -124,3 +125,7 @@ fields("prometheus") -> desc("prometheus") -> ?DESC(prometheus); desc(_) -> undefined. + +%% for CI test, CI don't load the whole emqx_conf_schema. +translation(Name) -> + emqx_conf_schema:translation(Name). diff --git a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl index 65023da14..eaf96af43 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl @@ -21,7 +21,6 @@ -export([ start_link/0, start_child/1, - start_child/2, stop_child/1 ]). @@ -40,23 +39,27 @@ start_link() -> supervisor:start_link({local, ?MODULE}, ?MODULE, []). --spec start_child(supervisor:child_spec()) -> ok. +-spec start_child(supervisor:child_spec() | atom()) -> ok. start_child(ChildSpec) when is_map(ChildSpec) -> - assert_started(supervisor:start_child(?MODULE, ChildSpec)). - --spec start_child(atom(), map()) -> ok. -start_child(Mod, Opts) when is_atom(Mod) andalso is_map(Opts) -> - assert_started(supervisor:start_child(?MODULE, ?CHILD(Mod, Opts))). + assert_started(supervisor:start_child(?MODULE, ChildSpec)); +start_child(Mod) when is_atom(Mod) -> + assert_started(supervisor:start_child(?MODULE, ?CHILD(Mod, []))). -spec stop_child(any()) -> ok | {error, term()}. stop_child(ChildId) -> case supervisor:terminate_child(?MODULE, ChildId) of ok -> supervisor:delete_child(?MODULE, ChildId); + {error, not_found} -> ok; Error -> Error end. init([]) -> - {ok, {{one_for_one, 10, 3600}, []}}. + Children = + case emqx_conf:get([prometheus, enable], false) of + false -> []; + true -> [?CHILD(emqx_prometheus, [])] + end, + {ok, {{one_for_one, 10, 3600}, Children}}. %%-------------------------------------------------------------------- %% Internal functions @@ -64,5 +67,5 @@ init([]) -> assert_started({ok, _Pid}) -> ok; assert_started({ok, _Pid, _Info}) -> ok; -assert_started({error, {already_tarted, _Pid}}) -> ok; +assert_started({error, {already_started, _Pid}}) -> ok; assert_started({error, Reason}) -> erlang:error(Reason). diff --git a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl index 3ae21511d..1a72b8952 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl @@ -74,7 +74,7 @@ t_start_stop(_) -> ?assertMatch(ok, emqx_prometheus:start()), ?assertMatch(ok, emqx_prometheus:stop()), ?assertMatch(ok, emqx_prometheus:restart()), - %% wait the interval timer tigger + %% wait the interval timer trigger timer:sleep(2000). t_collector_no_crash_test(_) -> diff --git a/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl index e72d7865a..59b3b9a17 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_api_SUITE.erl @@ -71,16 +71,27 @@ t_prometheus_api(_) -> #{ <<"push_gateway_server">> := _, <<"interval">> := _, - <<"enable">> := _ + <<"enable">> := _, + <<"vm_statistics_collector">> := _, + <<"vm_system_info_collector">> := _, + <<"vm_memory_collector">> := _, + <<"vm_msacc_collector">> := _ }, Conf ), - - NewConf = Conf#{<<"interval">> := <<"2s">>}, + #{<<"enable">> := Enable} = Conf, + ?assertEqual(Enable, undefined =/= erlang:whereis(emqx_prometheus)), + NewConf = Conf#{<<"interval">> => <<"2s">>, <<"vm_statistics_collector">> => <<"disabled">>}, {ok, Response2} = emqx_mgmt_api_test_util:request_api(put, Path, "", Auth, NewConf), Conf2 = emqx_json:decode(Response2, [return_maps]), ?assertMatch(NewConf, Conf2), + ?assertEqual({ok, []}, application:get_env(prometheus, vm_statistics_collector_metrics)), + ?assertEqual({ok, all}, application:get_env(prometheus, vm_memory_collector_metrics)), + + NewConf1 = Conf#{<<"enable">> => (not Enable)}, + {ok, _Response3} = emqx_mgmt_api_test_util:request_api(put, Path, "", Auth, NewConf1), + ?assertEqual((not Enable), undefined =/= erlang:whereis(emqx_prometheus)), ok. t_stats_api(_) -> From a3f877656e23a29a13d229c6a09447a906c4472b Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 26 Oct 2022 17:37:40 +0800 Subject: [PATCH 2/6] chore: add changlog for emqx_prometheus's change --- changes/v5.0.10-en.md | 4 ++++ changes/v5.0.10-zh.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/changes/v5.0.10-en.md b/changes/v5.0.10-en.md index c70ec9977..2b49aebf0 100644 --- a/changes/v5.0.10-en.md +++ b/changes/v5.0.10-en.md @@ -8,6 +8,10 @@ - Improve memory usage on core nodes when bootstrapping a replicant [#9236](https://github.com/emqx/emqx/pull/9236). +- Improve the stability of prometheus push_gateway and print error logs when POST fails [#9235](http://github.com/emqx/emqx/pull/9235). + +- Now we can disable some internal prometheus metrics. Closing some metrics that are not cared about when the prometheus/stats API timeout ( the machine load is too high ). [#9222](https://github.com/emqx/emqx/pull/9222). + ## Bug fixes - Fix error log message when `mechanism` is missing in authentication config [#8924](https://github.com/emqx/emqx/pull/8924). diff --git a/changes/v5.0.10-zh.md b/changes/v5.0.10-zh.md index 26b2737dd..636e7fbd6 100644 --- a/changes/v5.0.10-zh.md +++ b/changes/v5.0.10-zh.md @@ -8,6 +8,10 @@ - 在引导 `replicant` 节点时,改善 `core` 节点的内存使用量 [#9236](https://github.com/emqx/emqx/pull/9236)。 +- 增加 prometheus push_gateway 的稳定性, 并在 POST 失败时打印错误日志 [#9235](http://github.com/emqx/emqx/pull/9235)。 + +- 可通过配置关闭 prometheus 中的部分内部指标,如果遇到机器负载过高 prometheus 接口返回超时可考虑关闭部分不关心指标,以提高响应速度 [#9222](https://github.com/emqx/emqx/pull/9222)。 + ## Bug fixes - 优化认认证配置中 `mechanism` 字段缺失情况下的错误日志 [#8924](https://github.com/emqx/emqx/pull/8924)。 From 1757342e5ba5c896359eeff4b27539640d99967b Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 26 Oct 2022 17:51:40 +0800 Subject: [PATCH 3/6] chore: make dialyzer happy --- .../src/emqx_prometheus_config.erl | 26 ++++++++++++------- .../src/emqx_prometheus_sup.erl | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus_config.erl b/apps/emqx_prometheus/src/emqx_prometheus_config.erl index 83762b7c7..b4914f216 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_config.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_config.erl @@ -1,11 +1,18 @@ -%%%------------------------------------------------------------------- -%%% @author zhongwen -%%% @copyright (C) 2022, -%%% @doc -%%% -%%% @end -%%% Created : 26. 10月 2022 11:14 -%%%------------------------------------------------------------------- +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- -module(emqx_prometheus_config). -behaviour(emqx_config_handler). @@ -40,8 +47,7 @@ remove_handler() -> post_config_update(?PROMETHEUS, _Req, New, _Old, AppEnvs) -> application:set_env(AppEnvs), - update_prometheus(New), - ok; + update_prometheus(New); post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) -> ok. diff --git a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl index eaf96af43..a70fda322 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus_sup.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus_sup.erl @@ -68,4 +68,4 @@ init([]) -> assert_started({ok, _Pid}) -> ok; assert_started({ok, _Pid, _Info}) -> ok; assert_started({error, {already_started, _Pid}}) -> ok; -assert_started({error, Reason}) -> erlang:error(Reason). +assert_started({error, Reason}) -> {error, Reason}. From 97124be63afe361287714486c938e8eba926a503 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 26 Oct 2022 18:03:01 +0800 Subject: [PATCH 4/6] chore: add next_push_ms to emqx_prometheus:info/0 --- apps/emqx_prometheus/src/emqx_prometheus.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 4edf371cc..32f9928fb 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -83,8 +83,8 @@ init([]) -> #{interval := Interval} = opts(), {ok, #{timer => ensure_timer(Interval), ok => 0, failed => 0}}. -handle_call(info, _From, State) -> - {reply, State#{opts => opts()}, State}; +handle_call(info, _From, State = #{timer := Timer}) -> + {reply, State#{opts => opts(), next_push_ms => erlang:read_timer(Timer)}, State}; handle_call(_Msg, _From, State) -> {reply, ok, State}. From 3233a40af3289c4ce031984d072f2d39370c538c Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 26 Oct 2022 21:58:51 +0800 Subject: [PATCH 5/6] chore: don't delete bpapi when 5.0.x --- apps/emqx_prometheus/src/emqx_prometheus.erl | 14 ++++++++++++++ .../src/proto/emqx_prometheus_proto_v1.erl | 4 +++- changes/v5.0.10-en.md | 5 +++-- changes/v5.0.10-zh.md | 2 +- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/apps/emqx_prometheus/src/emqx_prometheus.erl b/apps/emqx_prometheus/src/emqx_prometheus.erl index 32f9928fb..de9349b97 100644 --- a/apps/emqx_prometheus/src/emqx_prometheus.erl +++ b/apps/emqx_prometheus/src/emqx_prometheus.erl @@ -59,6 +59,12 @@ -export([collect/1]). +-export([ + %% For bpapi, deprecated_since 5.0.10, remove this when 5.1.x + do_start/0, + do_stop/0 +]). + -define(C(K, L), proplists:get_value(K, L, 0)). -define(TIMER_MSG, '#interval'). @@ -591,3 +597,11 @@ emqx_cluster_data() -> {nodes_running, length(Running)}, {nodes_stopped, length(Stopped)} ]. + +%% deprecated_since 5.0.10, remove this when 5.1.x +do_start() -> + emqx_prometheus_sup:start_child(?APP). + +%% deprecated_since 5.0.10, remove this when 5.1.x +do_stop() -> + emqx_prometheus_sup:stop_child(?APP). diff --git a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v1.erl b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v1.erl index c0529cabd..e11f8e3ad 100644 --- a/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v1.erl +++ b/apps/emqx_prometheus/src/proto/emqx_prometheus_proto_v1.erl @@ -20,13 +20,15 @@ -export([ introduced_in/0, - + deprecated_since/0, start/1, stop/1 ]). -include_lib("emqx/include/bpapi.hrl"). +deprecated_since() -> "5.0.10". + introduced_in() -> "5.0.0". diff --git a/changes/v5.0.10-en.md b/changes/v5.0.10-en.md index 2b49aebf0..4bb7332f9 100644 --- a/changes/v5.0.10-en.md +++ b/changes/v5.0.10-en.md @@ -8,9 +8,10 @@ - Improve memory usage on core nodes when bootstrapping a replicant [#9236](https://github.com/emqx/emqx/pull/9236). -- Improve the stability of prometheus push_gateway and print error logs when POST fails [#9235](http://github.com/emqx/emqx/pull/9235). +- Improve stability of Prometheus Push Gateway and log errors when POST fails [#9235](http://github.com/emqx/emqx/pull/9235). -- Now we can disable some internal prometheus metrics. Closing some metrics that are not cared about when the prometheus/stats API timeout ( the machine load is too high ). [#9222](https://github.com/emqx/emqx/pull/9222). +- Now it is possible to opt out VM internal metrics in prometheus stats [#9222](https://github.com/emqx/emqx/pull/9222). + When system load is high, reporting too much metrics data may cause the prometheus stats API timeout. ## Bug fixes diff --git a/changes/v5.0.10-zh.md b/changes/v5.0.10-zh.md index 636e7fbd6..df5a16eed 100644 --- a/changes/v5.0.10-zh.md +++ b/changes/v5.0.10-zh.md @@ -8,7 +8,7 @@ - 在引导 `replicant` 节点时,改善 `core` 节点的内存使用量 [#9236](https://github.com/emqx/emqx/pull/9236)。 -- 增加 prometheus push_gateway 的稳定性, 并在 POST 失败时打印错误日志 [#9235](http://github.com/emqx/emqx/pull/9235)。 +- 增加 Prometheus Push Gateway 的稳定性, 并在 POST 失败时打印错误日志 [#9235](http://github.com/emqx/emqx/pull/9235)。 - 可通过配置关闭 prometheus 中的部分内部指标,如果遇到机器负载过高 prometheus 接口返回超时可考虑关闭部分不关心指标,以提高响应速度 [#9222](https://github.com/emqx/emqx/pull/9222)。 From d6dfead31d663bee61ea07b00c48b2d617041ac3 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Wed, 26 Oct 2022 22:05:20 +0800 Subject: [PATCH 6/6] fix: prometheus ct failed --- apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl index 1a72b8952..e26bcfeb4 100644 --- a/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl +++ b/apps/emqx_prometheus/test/emqx_prometheus_SUITE.erl @@ -71,9 +71,13 @@ load_config() -> %%-------------------------------------------------------------------- t_start_stop(_) -> - ?assertMatch(ok, emqx_prometheus:start()), - ?assertMatch(ok, emqx_prometheus:stop()), - ?assertMatch(ok, emqx_prometheus:restart()), + App = emqx_prometheus, + ?assertMatch(ok, emqx_prometheus_sup:start_child(App)), + %% start twice return ok. + ?assertMatch(ok, emqx_prometheus_sup:start_child(App)), + ?assertMatch(ok, emqx_prometheus_sup:stop_child(App)), + %% stop twice return ok. + ?assertMatch(ok, emqx_prometheus_sup:stop_child(App)), %% wait the interval timer trigger timer:sleep(2000).