From b6e6315b5076c39a2c394623651fdfc44dc3b1a9 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Mon, 30 Jan 2023 11:53:49 +0800 Subject: [PATCH] feat: change loads from string to float --- apps/emqx/src/emqx_alarm.erl | 5 +- apps/emqx/src/emqx_os_mon.erl | 34 +++-- apps/emqx/src/emqx_vm.erl | 12 +- apps/emqx/test/emqx_os_mon_SUITE.erl | 142 ++++++++++++++++-- apps/emqx/test/emqx_vm_SUITE.erl | 15 +- apps/emqx/test/emqx_vm_mon_SUITE.erl | 26 +++- apps/emqx_management/src/emqx_mgmt.erl | 2 +- .../src/emqx_mgmt_api_nodes.erl | 12 +- .../src/emqx_mgmt_sys_memory.erl | 2 +- .../test/emqx_mgmt_api_nodes_SUITE.erl | 4 +- 10 files changed, 199 insertions(+), 55 deletions(-) diff --git a/apps/emqx/src/emqx_alarm.erl b/apps/emqx/src/emqx_alarm.erl index 209715a85..84c40ef2a 100644 --- a/apps/emqx/src/emqx_alarm.erl +++ b/apps/emqx/src/emqx_alarm.erl @@ -325,19 +325,20 @@ deactivate_alarm( false -> ok end, + Now = erlang:system_time(microsecond), HistoryAlarm = make_deactivated_alarm( ActivateAt, Name, Details0, Msg0, - erlang:system_time(microsecond) + Now ), DeActAlarm = make_deactivated_alarm( ActivateAt, Name, Details, normalize_message(Name, iolist_to_binary(Message)), - erlang:system_time(microsecond) + Now ), mria:dirty_write(?DEACTIVATED_ALARM, HistoryAlarm), mria:dirty_delete(?ACTIVATED_ALARM, Name), diff --git a/apps/emqx/src/emqx_os_mon.erl b/apps/emqx/src/emqx_os_mon.erl index 5c6987ea0..c5ce35bf9 100644 --- a/apps/emqx/src/emqx_os_mon.erl +++ b/apps/emqx/src/emqx_os_mon.erl @@ -93,9 +93,9 @@ init([]) -> %% memsup is not reliable, ignore memsup:set_sysmem_high_watermark(1.0), SysHW = init_os_monitor(), - _ = start_mem_check_timer(), - _ = start_cpu_check_timer(), - {ok, #{sysmem_high_watermark => SysHW}}. + MemRef = start_mem_check_timer(), + CpuRef = start_cpu_check_timer(), + {ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}. init_os_monitor() -> init_os_monitor(emqx:get_config([sysmon, os])). @@ -125,8 +125,8 @@ handle_cast(Msg, State) -> handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = State) -> ok = update_mem_alarm_status(HWM), - ok = start_mem_check_timer(), - {noreply, State}; + Ref = start_mem_check_timer(), + {noreply, State#{mem_time_ref => Ref}}; handle_info({timeout, _Timer, cpu_check}, State) -> CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100, CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100, @@ -158,11 +158,14 @@ handle_info({timeout, _Timer, cpu_check}, State) -> _Busy -> ok end, - ok = start_cpu_check_timer(), - {noreply, State}; -handle_info({monitor_conf_update, OS}, _State) -> + Ref = start_cpu_check_timer(), + {noreply, State#{cpu_time_ref => Ref}}; +handle_info({monitor_conf_update, OS}, State) -> + cancel_outdated_timer(State), SysHW = init_os_monitor(OS), - {noreply, #{sysmem_high_watermark => SysHW}}; + MemRef = start_mem_check_timer(), + CpuRef = start_cpu_check_timer(), + {noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}; handle_info(Info, State) -> ?SLOG(error, #{msg => "unexpected_info", info => Info}), {noreply, State}. @@ -176,11 +179,15 @@ code_change(_OldVsn, State, _Extra) -> %%-------------------------------------------------------------------- %% Internal functions %%-------------------------------------------------------------------- +cancel_outdated_timer(#{mem_time_ref := MemRef, cpu_time_ref := CpuRef}) -> + emqx_misc:cancel_timer(MemRef), + emqx_misc:cancel_timer(CpuRef), + ok. start_cpu_check_timer() -> Interval = emqx:get_config([sysmon, os, cpu_check_interval]), case erlang:system_info(system_architecture) of - "x86_64-pc-linux-musl" -> ok; + "x86_64-pc-linux-musl" -> undefined; _ -> start_timer(Interval, cpu_check) end. @@ -193,12 +200,11 @@ start_mem_check_timer() -> true -> start_timer(Interval, mem_check); false -> - ok + undefined end. start_timer(Interval, Msg) -> - _ = emqx_misc:start_timer(Interval, Msg), - ok. + emqx_misc:start_timer(Interval, Msg). update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 -> ?SLOG(warning, #{msg => "discarded_out_of_range_mem_alarm_threshold", value => HWM}), @@ -225,7 +231,7 @@ do_update_mem_alarm_status(HWM0) -> }, usage_msg(Usage, mem) ); - _ -> + false -> ok = emqx_alarm:ensure_deactivated( high_system_memory_usage, #{ diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index 7da49016d..f80d18a3a 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -175,9 +175,9 @@ schedulers() -> loads() -> [ - {load1, ftos(avg1() / 256)}, - {load5, ftos(avg5() / 256)}, - {load15, ftos(avg15() / 256)} + {load1, load(avg1())}, + {load5, load(avg5())}, + {load15, load(avg15())} ]. system_info_keys() -> ?SYSTEM_INFO_KEYS. @@ -232,9 +232,6 @@ mem_info() -> Free = proplists:get_value(free_memory, Dataset), [{total_memory, Total}, {used_memory, Total - Free}]. -ftos(F) when is_float(F) -> - float_to_binary(F, [{decimals, 2}]). - %%%% erlang vm scheduler_usage fun copied from recon scheduler_usage(Interval) when is_integer(Interval) -> %% We start and stop the scheduler_wall_time system flag @@ -400,6 +397,9 @@ compat_windows(Fun) -> end end. +load(Avg) -> + floor((Avg / 256) * 100) / 100. + %% @doc Return on which Erlang/OTP the current vm is running. %% The dashboard's /api/nodes endpoint will call this function frequently. %% we should avoid reading file every time. diff --git a/apps/emqx/test/emqx_os_mon_SUITE.erl b/apps/emqx/test/emqx_os_mon_SUITE.erl index 8729bbdb6..0c5a1f261 100644 --- a/apps/emqx/test/emqx_os_mon_SUITE.erl +++ b/apps/emqx/test/emqx_os_mon_SUITE.erl @@ -25,25 +25,44 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps( - [], - fun - (emqx) -> - application:set_env(emqx, os_mon, [ - {cpu_check_interval, 1}, - {cpu_high_watermark, 5}, - {cpu_low_watermark, 80}, - {procmem_high_watermark, 5} - ]); - (_) -> - ok - end - ), + emqx_common_test_helpers:start_apps([]), Config. end_per_suite(_Config) -> emqx_common_test_helpers:stop_apps([]). +init_per_testcase(t_cpu_check_alarm, Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + SysMon = emqx_config:get([sysmon, os], #{}), + emqx_config:put([sysmon, os], SysMon#{ + cpu_high_watermark => 0.9, + cpu_low_watermark => 0, + %% 200ms + cpu_check_interval => 200 + }), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + Config; +init_per_testcase(t_sys_mem_check_alarm, Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + SysMon = emqx_config:get([sysmon, os], #{}), + emqx_config:put([sysmon, os], SysMon#{ + sysmem_high_watermark => 0.51, + %% 200ms + mem_check_interval => 200 + }), + ok = meck:new(os, [non_strict, no_link, no_history, passthrough, unstick]), + ok = meck:expect(os, type, fun() -> {unix, linux} end), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + Config; +init_per_testcase(_, Config) -> + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([]), + Config. + t_api(_) -> ?assertEqual(60000, emqx_os_mon:get_mem_check_interval()), ?assertEqual(ok, emqx_os_mon:set_mem_check_interval(30000)), @@ -67,3 +86,98 @@ t_api(_) -> emqx_os_mon ! ignored, gen_server:stop(emqx_os_mon), ok. + +t_sys_mem_check_alarm(_) -> + emqx_config:put([sysmon, os, mem_check_interval], 200), + emqx_os_mon:update(emqx_config:get([sysmon, os])), + Mem = 0.52345, + Usage = floor(Mem * 10000) / 100, + emqx_common_test_helpers:with_mock( + load_ctl, + get_memory_usage, + fun() -> Mem end, + fun() -> + timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), + ?assert( + emqx_vm_mon_SUITE:is_existing( + high_system_memory_usage, emqx_alarm:get_alarms(activated) + ), + #{ + load_ctl_memory => load_ctl:get_memory_usage(), + config => emqx_config:get([sysmon, os]), + process => sys:get_state(emqx_os_mon), + alarms => Alarms + } + ), + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 51.0, usage := RealUsage}, + message := Msg, + name := high_system_memory_usage + } + ] = + lists:filter( + fun + (#{name := high_system_memory_usage}) -> true; + (_) -> false + end, + Alarms + ), + ?assert(RealUsage >= Usage, {RealUsage, Usage}), + ?assert(is_binary(Msg)), + emqx_config:put([sysmon, os, sysmem_high_watermark], 0.99999), + ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), + {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon), + timer:sleep(600), + Activated = emqx_alarm:get_alarms(activated), + ?assertNot( + emqx_vm_mon_SUITE:is_existing(high_system_memory_usage, Activated), + #{activated => Activated, process_state => sys:get_state(emqx_os_mon)} + ) + end + ). + +t_cpu_check_alarm(_) -> + CpuUtil = 90.12345, + Usage = floor(CpuUtil * 100) / 100, + emqx_common_test_helpers:with_mock( + cpu_sup, + util, + fun() -> CpuUtil end, + fun() -> + timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), + ?assert( + emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) + ), + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 90.0, low_watermark := 0, usage := RealUsage}, + message := Msg, + name := high_cpu_usage + } + ] = + lists:filter( + fun + (#{name := high_cpu_usage}) -> true; + (_) -> false + end, + Alarms + ), + ?assert(RealUsage >= Usage, {RealUsage, Usage}), + ?assert(is_binary(Msg)), + emqx_config:put([sysmon, os, cpu_high_watermark], 1), + emqx_config:put([sysmon, os, cpu_low_watermark], 0.96), + timer:sleep(500), + ?assertNot( + emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) + ) + end + ). diff --git a/apps/emqx/test/emqx_vm_SUITE.erl b/apps/emqx/test/emqx_vm_SUITE.erl index 9115c5ab4..35f37a41e 100644 --- a/apps/emqx/test/emqx_vm_SUITE.erl +++ b/apps/emqx/test/emqx_vm_SUITE.erl @@ -25,19 +25,22 @@ all() -> emqx_common_test_helpers:all(?MODULE). t_load(_Config) -> lists:foreach( - fun(Avg, Int) -> + fun({Avg, LoadKey, Int}) -> emqx_common_test_helpers:with_mock( cpu_sup, Avg, fun() -> Int end, fun() -> - Load = proplists:get_value(Avg, emqx_vm:loads()), - ?assertEqual(Int / 1.0, Load) + Load = proplists:get_value(LoadKey, emqx_vm:loads()), + ?assertEqual(Int / 256, Load) end - ), - ?assertMatch([{load1, _}, {load5, _}, {load15, _}], emqx_vm:loads()) + ) end, - [{load1, 1}, {load5, 5}, {load15, 15}] + [{avg1, load1, 0}, {avg5, load5, 128}, {avg15, load15, 256}] + ), + ?assertMatch( + [{load1, _}, {load5, _}, {load15, _}], + emqx_vm:loads() ). t_systeminfo(_Config) -> diff --git a/apps/emqx/test/emqx_vm_mon_SUITE.erl b/apps/emqx/test/emqx_vm_mon_SUITE.erl index 140a00010..ceeffafb5 100644 --- a/apps/emqx/test/emqx_vm_mon_SUITE.erl +++ b/apps/emqx/test/emqx_vm_mon_SUITE.erl @@ -23,13 +23,13 @@ all() -> emqx_common_test_helpers:all(?MODULE). -init_per_testcase(t_alarms, Config) -> +init_per_testcase(t_too_many_processes_alarm, Config) -> emqx_common_test_helpers:boot_modules(all), emqx_common_test_helpers:start_apps([]), emqx_config:put([sysmon, vm], #{ process_high_watermark => 0, process_low_watermark => 0, - %% 1s + %% 100ms process_check_interval => 100 }), ok = supervisor:terminate_child(emqx_sys_sup, emqx_vm_mon), @@ -43,9 +43,29 @@ init_per_testcase(_, Config) -> end_per_testcase(_, _Config) -> emqx_common_test_helpers:stop_apps([]). -t_alarms(_) -> +t_too_many_processes_alarm(_) -> timer:sleep(500), + Alarms = emqx_alarm:get_alarms(activated), ?assert(is_existing(too_many_processes, emqx_alarm:get_alarms(activated))), + ?assertMatch( + [ + #{ + activate_at := _, + activated := true, + deactivate_at := infinity, + details := #{high_watermark := 0, low_watermark := 0, usage := "0%"}, + message := <<"0% process usage">>, + name := too_many_processes + } + ], + lists:filter( + fun + (#{name := too_many_processes}) -> true; + (_) -> false + end, + Alarms + ) + ), emqx_config:put([sysmon, vm, process_high_watermark], 70), emqx_config:put([sysmon, vm, process_low_watermark], 60), timer:sleep(500), diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 09adde8bd..f794ef01d 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -126,7 +126,7 @@ lookup_node(Node) -> node_info() -> {UsedRatio, Total} = get_sys_memory(), - Info = maps:from_list([{K, list_to_binary(V)} || {K, V} <- emqx_vm:loads()]), + Info = maps:from_list(emqx_vm:loads()), BrokerInfo = emqx_sys:info(), Info#{ node => node(), diff --git a/apps/emqx_management/src/emqx_mgmt_api_nodes.erl b/apps/emqx_management/src/emqx_mgmt_api_nodes.erl index 64ef3c1ef..cb8d37609 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_nodes.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_nodes.erl @@ -159,18 +159,18 @@ fields(node_info) -> )}, {load1, mk( - string(), - #{desc => <<"CPU average load in 1 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 1 minute">>, example => 2.66} )}, {load5, mk( - string(), - #{desc => <<"CPU average load in 5 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 5 minute">>, example => 2.66} )}, {load15, mk( - string(), - #{desc => <<"CPU average load in 15 minute">>, example => "2.66"} + float(), + #{desc => <<"CPU average load in 15 minute">>, example => 2.66} )}, {max_fds, mk( diff --git a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl index d393caabe..cc4f987b5 100644 --- a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl +++ b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl @@ -17,7 +17,7 @@ -behaviour(gen_server). -define(SYS_MEMORY_CACHE_KEY, ?MODULE). --define(TIMEOUT, 3000). +-define(TIMEOUT, 2200). -export([start_link/0, get_sys_memory/0, get_sys_memory/1]). -export([ diff --git a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl index 2bbdf938d..a0dbb9314 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_nodes_SUITE.erl @@ -24,11 +24,11 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - emqx_mgmt_api_test_util:init_suite([emqx_conf]), + emqx_mgmt_api_test_util:init_suite([emqx_conf, emqx_management]), Config. end_per_suite(_) -> - emqx_mgmt_api_test_util:end_suite([emqx_conf]). + emqx_mgmt_api_test_util:end_suite([emqx_management, emqx_conf]). init_per_testcase(t_log_path, Config) -> emqx_config_logger:add_handler(),