feat: change loads from string to float

This commit is contained in:
Zhongwen Deng 2023-01-30 11:53:49 +08:00
parent 0b19be074c
commit b6e6315b50
10 changed files with 199 additions and 55 deletions

View File

@ -325,19 +325,20 @@ deactivate_alarm(
false -> false ->
ok ok
end, end,
Now = erlang:system_time(microsecond),
HistoryAlarm = make_deactivated_alarm( HistoryAlarm = make_deactivated_alarm(
ActivateAt, ActivateAt,
Name, Name,
Details0, Details0,
Msg0, Msg0,
erlang:system_time(microsecond) Now
), ),
DeActAlarm = make_deactivated_alarm( DeActAlarm = make_deactivated_alarm(
ActivateAt, ActivateAt,
Name, Name,
Details, Details,
normalize_message(Name, iolist_to_binary(Message)), normalize_message(Name, iolist_to_binary(Message)),
erlang:system_time(microsecond) Now
), ),
mria:dirty_write(?DEACTIVATED_ALARM, HistoryAlarm), mria:dirty_write(?DEACTIVATED_ALARM, HistoryAlarm),
mria:dirty_delete(?ACTIVATED_ALARM, Name), mria:dirty_delete(?ACTIVATED_ALARM, Name),

View File

@ -93,9 +93,9 @@ init([]) ->
%% memsup is not reliable, ignore %% memsup is not reliable, ignore
memsup:set_sysmem_high_watermark(1.0), memsup:set_sysmem_high_watermark(1.0),
SysHW = init_os_monitor(), SysHW = init_os_monitor(),
_ = start_mem_check_timer(), MemRef = start_mem_check_timer(),
_ = start_cpu_check_timer(), CpuRef = start_cpu_check_timer(),
{ok, #{sysmem_high_watermark => SysHW}}. {ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
init_os_monitor() -> init_os_monitor() ->
init_os_monitor(emqx:get_config([sysmon, os])). init_os_monitor(emqx:get_config([sysmon, os])).
@ -125,8 +125,8 @@ handle_cast(Msg, State) ->
handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = State) -> handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = State) ->
ok = update_mem_alarm_status(HWM), ok = update_mem_alarm_status(HWM),
ok = start_mem_check_timer(), Ref = start_mem_check_timer(),
{noreply, State}; {noreply, State#{mem_time_ref => Ref}};
handle_info({timeout, _Timer, cpu_check}, State) -> handle_info({timeout, _Timer, cpu_check}, State) ->
CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100, CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100, CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
@ -158,11 +158,14 @@ handle_info({timeout, _Timer, cpu_check}, State) ->
_Busy -> _Busy ->
ok ok
end, end,
ok = start_cpu_check_timer(), Ref = start_cpu_check_timer(),
{noreply, State}; {noreply, State#{cpu_time_ref => Ref}};
handle_info({monitor_conf_update, OS}, _State) -> handle_info({monitor_conf_update, OS}, State) ->
cancel_outdated_timer(State),
SysHW = init_os_monitor(OS), SysHW = init_os_monitor(OS),
{noreply, #{sysmem_high_watermark => SysHW}}; MemRef = start_mem_check_timer(),
CpuRef = start_cpu_check_timer(),
{noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}};
handle_info(Info, State) -> handle_info(Info, State) ->
?SLOG(error, #{msg => "unexpected_info", info => Info}), ?SLOG(error, #{msg => "unexpected_info", info => Info}),
{noreply, State}. {noreply, State}.
@ -176,11 +179,15 @@ code_change(_OldVsn, State, _Extra) ->
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
%% Internal functions %% Internal functions
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
cancel_outdated_timer(#{mem_time_ref := MemRef, cpu_time_ref := CpuRef}) ->
emqx_misc:cancel_timer(MemRef),
emqx_misc:cancel_timer(CpuRef),
ok.
start_cpu_check_timer() -> start_cpu_check_timer() ->
Interval = emqx:get_config([sysmon, os, cpu_check_interval]), Interval = emqx:get_config([sysmon, os, cpu_check_interval]),
case erlang:system_info(system_architecture) of case erlang:system_info(system_architecture) of
"x86_64-pc-linux-musl" -> ok; "x86_64-pc-linux-musl" -> undefined;
_ -> start_timer(Interval, cpu_check) _ -> start_timer(Interval, cpu_check)
end. end.
@ -193,12 +200,11 @@ start_mem_check_timer() ->
true -> true ->
start_timer(Interval, mem_check); start_timer(Interval, mem_check);
false -> false ->
ok undefined
end. end.
start_timer(Interval, Msg) -> start_timer(Interval, Msg) ->
_ = emqx_misc:start_timer(Interval, Msg), emqx_misc:start_timer(Interval, Msg).
ok.
update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 -> update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 ->
?SLOG(warning, #{msg => "discarded_out_of_range_mem_alarm_threshold", value => HWM}), ?SLOG(warning, #{msg => "discarded_out_of_range_mem_alarm_threshold", value => HWM}),
@ -225,7 +231,7 @@ do_update_mem_alarm_status(HWM0) ->
}, },
usage_msg(Usage, mem) usage_msg(Usage, mem)
); );
_ -> false ->
ok = emqx_alarm:ensure_deactivated( ok = emqx_alarm:ensure_deactivated(
high_system_memory_usage, high_system_memory_usage,
#{ #{

View File

@ -175,9 +175,9 @@ schedulers() ->
loads() -> loads() ->
[ [
{load1, ftos(avg1() / 256)}, {load1, load(avg1())},
{load5, ftos(avg5() / 256)}, {load5, load(avg5())},
{load15, ftos(avg15() / 256)} {load15, load(avg15())}
]. ].
system_info_keys() -> ?SYSTEM_INFO_KEYS. system_info_keys() -> ?SYSTEM_INFO_KEYS.
@ -232,9 +232,6 @@ mem_info() ->
Free = proplists:get_value(free_memory, Dataset), Free = proplists:get_value(free_memory, Dataset),
[{total_memory, Total}, {used_memory, Total - Free}]. [{total_memory, Total}, {used_memory, Total - Free}].
ftos(F) when is_float(F) ->
float_to_binary(F, [{decimals, 2}]).
%%%% erlang vm scheduler_usage fun copied from recon %%%% erlang vm scheduler_usage fun copied from recon
scheduler_usage(Interval) when is_integer(Interval) -> scheduler_usage(Interval) when is_integer(Interval) ->
%% We start and stop the scheduler_wall_time system flag %% We start and stop the scheduler_wall_time system flag
@ -400,6 +397,9 @@ compat_windows(Fun) ->
end end
end. end.
load(Avg) ->
floor((Avg / 256) * 100) / 100.
%% @doc Return on which Erlang/OTP the current vm is running. %% @doc Return on which Erlang/OTP the current vm is running.
%% The dashboard's /api/nodes endpoint will call this function frequently. %% The dashboard's /api/nodes endpoint will call this function frequently.
%% we should avoid reading file every time. %% we should avoid reading file every time.

View File

@ -25,25 +25,44 @@ all() -> emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) -> init_per_suite(Config) ->
emqx_common_test_helpers:boot_modules(all), emqx_common_test_helpers:boot_modules(all),
emqx_common_test_helpers:start_apps( emqx_common_test_helpers:start_apps([]),
[],
fun
(emqx) ->
application:set_env(emqx, os_mon, [
{cpu_check_interval, 1},
{cpu_high_watermark, 5},
{cpu_low_watermark, 80},
{procmem_high_watermark, 5}
]);
(_) ->
ok
end
),
Config. Config.
end_per_suite(_Config) -> end_per_suite(_Config) ->
emqx_common_test_helpers:stop_apps([]). emqx_common_test_helpers:stop_apps([]).
init_per_testcase(t_cpu_check_alarm, Config) ->
emqx_common_test_helpers:boot_modules(all),
emqx_common_test_helpers:start_apps([]),
SysMon = emqx_config:get([sysmon, os], #{}),
emqx_config:put([sysmon, os], SysMon#{
cpu_high_watermark => 0.9,
cpu_low_watermark => 0,
%% 200ms
cpu_check_interval => 200
}),
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
Config;
init_per_testcase(t_sys_mem_check_alarm, Config) ->
emqx_common_test_helpers:boot_modules(all),
emqx_common_test_helpers:start_apps([]),
SysMon = emqx_config:get([sysmon, os], #{}),
emqx_config:put([sysmon, os], SysMon#{
sysmem_high_watermark => 0.51,
%% 200ms
mem_check_interval => 200
}),
ok = meck:new(os, [non_strict, no_link, no_history, passthrough, unstick]),
ok = meck:expect(os, type, fun() -> {unix, linux} end),
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
Config;
init_per_testcase(_, Config) ->
emqx_common_test_helpers:boot_modules(all),
emqx_common_test_helpers:start_apps([]),
Config.
t_api(_) -> t_api(_) ->
?assertEqual(60000, emqx_os_mon:get_mem_check_interval()), ?assertEqual(60000, emqx_os_mon:get_mem_check_interval()),
?assertEqual(ok, emqx_os_mon:set_mem_check_interval(30000)), ?assertEqual(ok, emqx_os_mon:set_mem_check_interval(30000)),
@ -67,3 +86,98 @@ t_api(_) ->
emqx_os_mon ! ignored, emqx_os_mon ! ignored,
gen_server:stop(emqx_os_mon), gen_server:stop(emqx_os_mon),
ok. ok.
t_sys_mem_check_alarm(_) ->
emqx_config:put([sysmon, os, mem_check_interval], 200),
emqx_os_mon:update(emqx_config:get([sysmon, os])),
Mem = 0.52345,
Usage = floor(Mem * 10000) / 100,
emqx_common_test_helpers:with_mock(
load_ctl,
get_memory_usage,
fun() -> Mem end,
fun() ->
timer:sleep(500),
Alarms = emqx_alarm:get_alarms(activated),
?assert(
emqx_vm_mon_SUITE:is_existing(
high_system_memory_usage, emqx_alarm:get_alarms(activated)
),
#{
load_ctl_memory => load_ctl:get_memory_usage(),
config => emqx_config:get([sysmon, os]),
process => sys:get_state(emqx_os_mon),
alarms => Alarms
}
),
[
#{
activate_at := _,
activated := true,
deactivate_at := infinity,
details := #{high_watermark := 51.0, usage := RealUsage},
message := Msg,
name := high_system_memory_usage
}
] =
lists:filter(
fun
(#{name := high_system_memory_usage}) -> true;
(_) -> false
end,
Alarms
),
?assert(RealUsage >= Usage, {RealUsage, Usage}),
?assert(is_binary(Msg)),
emqx_config:put([sysmon, os, sysmem_high_watermark], 0.99999),
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
timer:sleep(600),
Activated = emqx_alarm:get_alarms(activated),
?assertNot(
emqx_vm_mon_SUITE:is_existing(high_system_memory_usage, Activated),
#{activated => Activated, process_state => sys:get_state(emqx_os_mon)}
)
end
).
t_cpu_check_alarm(_) ->
CpuUtil = 90.12345,
Usage = floor(CpuUtil * 100) / 100,
emqx_common_test_helpers:with_mock(
cpu_sup,
util,
fun() -> CpuUtil end,
fun() ->
timer:sleep(500),
Alarms = emqx_alarm:get_alarms(activated),
?assert(
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
),
[
#{
activate_at := _,
activated := true,
deactivate_at := infinity,
details := #{high_watermark := 90.0, low_watermark := 0, usage := RealUsage},
message := Msg,
name := high_cpu_usage
}
] =
lists:filter(
fun
(#{name := high_cpu_usage}) -> true;
(_) -> false
end,
Alarms
),
?assert(RealUsage >= Usage, {RealUsage, Usage}),
?assert(is_binary(Msg)),
emqx_config:put([sysmon, os, cpu_high_watermark], 1),
emqx_config:put([sysmon, os, cpu_low_watermark], 0.96),
timer:sleep(500),
?assertNot(
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
)
end
).

View File

@ -25,19 +25,22 @@ all() -> emqx_common_test_helpers:all(?MODULE).
t_load(_Config) -> t_load(_Config) ->
lists:foreach( lists:foreach(
fun(Avg, Int) -> fun({Avg, LoadKey, Int}) ->
emqx_common_test_helpers:with_mock( emqx_common_test_helpers:with_mock(
cpu_sup, cpu_sup,
Avg, Avg,
fun() -> Int end, fun() -> Int end,
fun() -> fun() ->
Load = proplists:get_value(Avg, emqx_vm:loads()), Load = proplists:get_value(LoadKey, emqx_vm:loads()),
?assertEqual(Int / 1.0, Load) ?assertEqual(Int / 256, Load)
end end
), )
?assertMatch([{load1, _}, {load5, _}, {load15, _}], emqx_vm:loads())
end, end,
[{load1, 1}, {load5, 5}, {load15, 15}] [{avg1, load1, 0}, {avg5, load5, 128}, {avg15, load15, 256}]
),
?assertMatch(
[{load1, _}, {load5, _}, {load15, _}],
emqx_vm:loads()
). ).
t_systeminfo(_Config) -> t_systeminfo(_Config) ->

View File

@ -23,13 +23,13 @@
all() -> emqx_common_test_helpers:all(?MODULE). all() -> emqx_common_test_helpers:all(?MODULE).
init_per_testcase(t_alarms, Config) -> init_per_testcase(t_too_many_processes_alarm, Config) ->
emqx_common_test_helpers:boot_modules(all), emqx_common_test_helpers:boot_modules(all),
emqx_common_test_helpers:start_apps([]), emqx_common_test_helpers:start_apps([]),
emqx_config:put([sysmon, vm], #{ emqx_config:put([sysmon, vm], #{
process_high_watermark => 0, process_high_watermark => 0,
process_low_watermark => 0, process_low_watermark => 0,
%% 1s %% 100ms
process_check_interval => 100 process_check_interval => 100
}), }),
ok = supervisor:terminate_child(emqx_sys_sup, emqx_vm_mon), ok = supervisor:terminate_child(emqx_sys_sup, emqx_vm_mon),
@ -43,9 +43,29 @@ init_per_testcase(_, Config) ->
end_per_testcase(_, _Config) -> end_per_testcase(_, _Config) ->
emqx_common_test_helpers:stop_apps([]). emqx_common_test_helpers:stop_apps([]).
t_alarms(_) -> t_too_many_processes_alarm(_) ->
timer:sleep(500), timer:sleep(500),
Alarms = emqx_alarm:get_alarms(activated),
?assert(is_existing(too_many_processes, emqx_alarm:get_alarms(activated))), ?assert(is_existing(too_many_processes, emqx_alarm:get_alarms(activated))),
?assertMatch(
[
#{
activate_at := _,
activated := true,
deactivate_at := infinity,
details := #{high_watermark := 0, low_watermark := 0, usage := "0%"},
message := <<"0% process usage">>,
name := too_many_processes
}
],
lists:filter(
fun
(#{name := too_many_processes}) -> true;
(_) -> false
end,
Alarms
)
),
emqx_config:put([sysmon, vm, process_high_watermark], 70), emqx_config:put([sysmon, vm, process_high_watermark], 70),
emqx_config:put([sysmon, vm, process_low_watermark], 60), emqx_config:put([sysmon, vm, process_low_watermark], 60),
timer:sleep(500), timer:sleep(500),

View File

@ -126,7 +126,7 @@ lookup_node(Node) ->
node_info() -> node_info() ->
{UsedRatio, Total} = get_sys_memory(), {UsedRatio, Total} = get_sys_memory(),
Info = maps:from_list([{K, list_to_binary(V)} || {K, V} <- emqx_vm:loads()]), Info = maps:from_list(emqx_vm:loads()),
BrokerInfo = emqx_sys:info(), BrokerInfo = emqx_sys:info(),
Info#{ Info#{
node => node(), node => node(),

View File

@ -159,18 +159,18 @@ fields(node_info) ->
)}, )},
{load1, {load1,
mk( mk(
string(), float(),
#{desc => <<"CPU average load in 1 minute">>, example => "2.66"} #{desc => <<"CPU average load in 1 minute">>, example => 2.66}
)}, )},
{load5, {load5,
mk( mk(
string(), float(),
#{desc => <<"CPU average load in 5 minute">>, example => "2.66"} #{desc => <<"CPU average load in 5 minute">>, example => 2.66}
)}, )},
{load15, {load15,
mk( mk(
string(), float(),
#{desc => <<"CPU average load in 15 minute">>, example => "2.66"} #{desc => <<"CPU average load in 15 minute">>, example => 2.66}
)}, )},
{max_fds, {max_fds,
mk( mk(

View File

@ -17,7 +17,7 @@
-behaviour(gen_server). -behaviour(gen_server).
-define(SYS_MEMORY_CACHE_KEY, ?MODULE). -define(SYS_MEMORY_CACHE_KEY, ?MODULE).
-define(TIMEOUT, 3000). -define(TIMEOUT, 2200).
-export([start_link/0, get_sys_memory/0, get_sys_memory/1]). -export([start_link/0, get_sys_memory/0, get_sys_memory/1]).
-export([ -export([

View File

@ -24,11 +24,11 @@ all() ->
emqx_common_test_helpers:all(?MODULE). emqx_common_test_helpers:all(?MODULE).
init_per_suite(Config) -> init_per_suite(Config) ->
emqx_mgmt_api_test_util:init_suite([emqx_conf]), emqx_mgmt_api_test_util:init_suite([emqx_conf, emqx_management]),
Config. Config.
end_per_suite(_) -> end_per_suite(_) ->
emqx_mgmt_api_test_util:end_suite([emqx_conf]). emqx_mgmt_api_test_util:end_suite([emqx_management, emqx_conf]).
init_per_testcase(t_log_path, Config) -> init_per_testcase(t_log_path, Config) ->
emqx_config_logger:add_handler(), emqx_config_logger:add_handler(),