Merge pull request #9974 from zmstone/0630-fix-ram-metrics-data

fix(metrics): use the same data source for RAM usage info
This commit is contained in:
Zaiming (Stone) Shi 2023-02-16 08:35:47 +01:00 committed by GitHub
commit 41c7e8a1c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 34 additions and 77 deletions

View File

@ -1,33 +1,5 @@
%% -*- mode: erlang -*-
%% Unless you know what you are doing, DO NOT edit manually!!
{VSN,
[{"5.0.0",
[{load_module,emqx_quic_connection,brutal_purge,soft_purge,[]},
{load_module,emqx_config,brutal_purge,soft_purge,[]},
{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_schema,brutal_purge,soft_purge,[]},
{load_module,emqx_release,brutal_purge,soft_purge,[]},
{load_module,emqx_authentication,brutal_purge,soft_purge,[]},
{load_module,emqx_metrics,brutal_purge,soft_purge,[]},
{add_module,emqx_exclusive_subscription},
{apply,{emqx_exclusive_subscription,on_add_module,[]}},
{load_module,emqx_broker,brutal_purge,soft_purge,[]},
{load_module,emqx_mqtt_caps,brutal_purge,soft_purge,[]},
{load_module,emqx_topic,brutal_purge,soft_purge,[]},
{load_module,emqx_relup}]},
{<<".*">>,[]}],
[{"5.0.0",
[{load_module,emqx_quic_connection,brutal_purge,soft_purge,[]},
{load_module,emqx_config,brutal_purge,soft_purge,[]},
{load_module,emqx_channel,brutal_purge,soft_purge,[]},
{load_module,emqx_schema,brutal_purge,soft_purge,[]},
{load_module,emqx_release,brutal_purge,soft_purge,[]},
{load_module,emqx_authentication,brutal_purge,soft_purge,[]},
{load_module,emqx_metrics,brutal_purge,soft_purge,[]},
{load_module,emqx_broker,brutal_purge,soft_purge,[]},
{load_module,emqx_mqtt_caps,brutal_purge,soft_purge,[]},
{load_module,emqx_topic,brutal_purge,soft_purge,[]},
{apply,{emqx_exclusive_subscription,on_delete_module,[]}},
{delete_module,emqx_exclusive_subscription},
{load_module,emqx_relup}]},
{<<".*">>,[]}]}.
[{<<".*">>,[]}],
[{<<".*">>,[]}]}.

View File

@ -24,7 +24,6 @@
get_system_info/1,
get_memory/0,
get_memory/2,
mem_info/0,
loads/0
]).
@ -226,12 +225,6 @@ convert_allocated_areas({Key, Value1, Value2}) ->
convert_allocated_areas({Key, Value}) ->
{Key, Value}.
mem_info() ->
Dataset = memsup:get_system_memory_data(),
Total = proplists:get_value(total_memory, Dataset),
Free = proplists:get_value(free_memory, Dataset),
[{total_memory, Total}, {used_memory, Total - Free}].
%%%% erlang vm scheduler_usage fun copied from recon
scheduler_usage(Interval) when is_integer(Interval) ->
%% We start and stop the scheduler_wall_time system flag

View File

@ -50,12 +50,6 @@ t_systeminfo(_Config) ->
),
?assertEqual(undefined, emqx_vm:get_system_info(undefined)).
t_mem_info(_Config) ->
application:ensure_all_started(os_mon),
MemInfo = emqx_vm:mem_info(),
[{total_memory, _}, {used_memory, _}] = MemInfo,
application:stop(os_mon).
t_process_info(_Config) ->
ProcessInfo = emqx_vm:get_process_info(),
?assertEqual(emqx_vm:process_info_keys(), [K || {K, _V} <- ProcessInfo]).

View File

@ -104,7 +104,10 @@
]).
%% Common Table API
-export([max_row_limit/0]).
-export([
max_row_limit/0,
vm_stats/0
]).
-define(APP, emqx_management).
@ -161,6 +164,23 @@ node_info(Nodes) ->
stopped_node_info(Node) ->
#{name => Node, node_status => 'stopped'}.
vm_stats() ->
Idle =
case cpu_sup:util([detailed]) of
%% Not support for Windows
{_, 0, 0, _} -> 0;
{_Num, _Use, IdleList, _} -> proplists:get_value(idle, IdleList, 0)
end,
RunQueue = erlang:statistics(run_queue),
{MemUsedRatio, MemTotal} = get_sys_memory(),
[
{run_queue, RunQueue},
{cpu_idle, Idle},
{cpu_use, 100 - Idle},
{total_memory, MemTotal},
{used_memory, erlang:round(MemTotal * MemUsedRatio)}
].
%%--------------------------------------------------------------------
%% Brokers
%%--------------------------------------------------------------------

View File

@ -2,10 +2,10 @@
{application, emqx_prometheus, [
{description, "Prometheus for EMQX"},
% strict semver, bump manually!
{vsn, "5.0.5"},
{vsn, "5.0.6"},
{modules, []},
{registered, [emqx_prometheus_sup]},
{applications, [kernel, stdlib, prometheus, emqx]},
{applications, [kernel, stdlib, prometheus, emqx, emqx_management]},
{mod, {emqx_prometheus_app, []}},
{env, []},
{licenses, ["Apache-2.0"]},

View File

@ -590,20 +590,7 @@ emqx_vm() ->
].
emqx_vm_data() ->
Idle =
case cpu_sup:util([detailed]) of
%% Not support for Windows
{_, 0, 0, _} -> 0;
{_Num, _Use, IdleList, _} -> ?C(idle, IdleList)
end,
RunQueue = erlang:statistics(run_queue),
[
{run_queue, RunQueue},
%% XXX: Plan removed at v5.0
{process_total_messages, 0},
{cpu_idle, Idle},
{cpu_use, 100 - Idle}
] ++ emqx_vm:mem_info().
emqx_mgmt:vm_stats().
emqx_cluster() ->
[

View File

@ -1,14 +1,15 @@
%% -*- mode: erlang -*-
{application, emqx_statsd, [
{description, "EMQX Statsd"},
{vsn, "5.0.4"},
{vsn, "5.0.5"},
{registered, []},
{mod, {emqx_statsd_app, []}},
{applications, [
kernel,
stdlib,
estatsd,
emqx
emqx,
emqx_management
]},
{env, []},
{modules, []},

View File

@ -105,7 +105,7 @@ handle_info(
timer := Ref
}
) ->
Metrics = emqx_metrics:all() ++ emqx_stats:getstats() ++ emqx_vm_data(),
Metrics = emqx_metrics:all() ++ emqx_stats:getstats() ++ emqx_mgmt:vm_stats(),
SampleRate = SampleTimeInterval / FlushTimeInterval,
StatsdMetrics = [
{gauge, Name, Value, SampleRate, []}
@ -129,20 +129,6 @@ terminate(_Reason, #{estatsd_pid := Pid}) ->
%% Internal function
%%------------------------------------------------------------------------------
emqx_vm_data() ->
Idle =
case cpu_sup:util([detailed]) of
%% Not support for Windows
{_, 0, 0, _} -> 0;
{_Num, _Use, IdleList, _} -> proplists:get_value(idle, IdleList, 0)
end,
RunQueue = erlang:statistics(run_queue),
[
{run_queue, RunQueue},
{cpu_idle, Idle},
{cpu_use, 100 - Idle}
] ++ emqx_vm:mem_info().
ensure_timer(State = #{sample_time_interval := SampleTimeInterval}) ->
State#{timer => emqx_misc:start_timer(SampleTimeInterval, ?SAMPLE_TIMEOUT)}.

View File

@ -0,0 +1,2 @@
Report memory usage to statsd and prometheus using the same data source as dashboard.
Prior to this fix, the memory usage data source was collected from an outdated source which did not work well in containers.

View File

@ -0,0 +1,2 @@
Statsd 和 prometheus 使用跟 Dashboard 相同的内存用量数据源。
在此修复前,内存的总量和用量统计使用了过时的(在容器环境中不准确)的数据源。