diff --git a/apps/emqx/src/emqx_os_mon.erl b/apps/emqx/src/emqx_os_mon.erl index e90866c71..b76b204f6 100644 --- a/apps/emqx/src/emqx_os_mon.erl +++ b/apps/emqx/src/emqx_os_mon.erl @@ -76,10 +76,16 @@ set_procmem_high_watermark(Float) -> %%-------------------------------------------------------------------- init([]) -> - Opts = emqx:get_config([sysmon, os]), - set_mem_check_interval(maps:get(mem_check_interval, Opts)), - set_sysmem_high_watermark(maps:get(sysmem_high_watermark, Opts)), - set_procmem_high_watermark(maps:get(procmem_high_watermark, Opts)), + #{ + sysmem_high_watermark := SysHW, + procmem_high_watermark := PHW, + mem_check_interval := MCI + } = emqx:get_config([sysmon, os]), + + set_sysmem_high_watermark(SysHW), + set_procmem_high_watermark(PHW), + set_mem_check_interval(MCI), + ensure_system_memory_alarm(SysHW), _ = start_check_timer(), {ok, #{}}. @@ -95,7 +101,7 @@ handle_info({timeout, _Timer, check}, State) -> CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100, _ = case emqx_vm:cpu_util() of %% TODO: should be improved? 0 -> ok; - Busy when Busy >= CPUHighWatermark -> + Busy when Busy > CPUHighWatermark -> Usage = list_to_binary(io_lib:format("~.2f%", [Busy])), Message = <>, emqx_alarm:activate(high_cpu_usage, @@ -106,7 +112,7 @@ handle_info({timeout, _Timer, check}, State) -> }, Message), start_check_timer(); - Busy when Busy =< CPULowWatermark -> + Busy when Busy < CPULowWatermark -> Usage = list_to_binary(io_lib:format("~.2f%", [Busy])), Message = <>, emqx_alarm:deactivate(high_cpu_usage, @@ -142,3 +148,20 @@ start_check_timer() -> "x86_64-pc-linux-musl" -> ok; _ -> emqx_misc:start_timer(Interval, check) end. + +%% At startup, memsup starts first and checks for memory alarms, +%% but emqx_alarm_handler is not yet used instead of alarm_handler, +%% so alarm_handler is used directly for notification (normally emqx_alarm_handler should be used). +%%The internal memsup will no longer trigger events that have been alerted, +%% and there is no exported function to remove the alerted flag, +%% so it can only be checked again at startup. +ensure_system_memory_alarm(HW) -> + case erlang:whereis(memsup) of + undefined -> ok; + _Pid -> + {Allocated, Total, _Worst} = memsup:get_memory_data(), + case Total =/= 0 andalso Allocated/Total * 100 > HW of + true -> emqx_alarm:activate(high_system_memory_usage, #{high_watermark => HW}); + false -> ok + end + end. diff --git a/apps/emqx/src/emqx_vm_mon.erl b/apps/emqx/src/emqx_vm_mon.erl index e1b9e9037..18005dce5 100644 --- a/apps/emqx/src/emqx_vm_mon.erl +++ b/apps/emqx/src/emqx_vm_mon.erl @@ -61,7 +61,7 @@ handle_info({timeout, _Timer, check}, State) -> ProcLowWatermark = emqx:get_config([sysmon, vm, process_low_watermark]), ProcessCount = erlang:system_info(process_count), case ProcessCount / erlang:system_info(process_limit) of - Percent when Percent >= ProcHighWatermark -> + Percent when Percent > ProcHighWatermark -> Usage = io_lib:format("~p%", [Percent*100]), Message = [Usage, " process usage"], emqx_alarm:activate(too_many_processes, diff --git a/apps/emqx/test/emqx_os_mon_SUITE.erl b/apps/emqx/test/emqx_os_mon_SUITE.erl index 163af9c4a..4e443b4ca 100644 --- a/apps/emqx/test/emqx_os_mon_SUITE.erl +++ b/apps/emqx/test/emqx_os_mon_SUITE.erl @@ -24,20 +24,24 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - emqx_config:put([sysmon, os], #{ - cpu_check_interval => 60000,cpu_high_watermark => 0.8, - cpu_low_watermark => 0.6,mem_check_interval => 60000, - procmem_high_watermark => 0.05,sysmem_high_watermark => 0.7}), - application:ensure_all_started(os_mon), + emqx_common_test_helpers:boot_modules(all), + emqx_common_test_helpers:start_apps([], + fun(emqx) -> + application:set_env(emqx, os_mon, [ + {cpu_check_interval, 1}, + {cpu_high_watermark, 5}, + {cpu_low_watermark, 80}, + {mem_check_interval, 60}, + {sysmem_high_watermark, 70}, + {procmem_high_watermark, 5}]); + (_) -> ok + end), Config. end_per_suite(_Config) -> - application:stop(os_mon). + emqx_common_test_helpers:stop_apps([]). t_api(_) -> - gen_event:swap_handler(alarm_handler, {emqx_alarm_handler, swap}, {alarm_handler, []}), - {ok, _} = emqx_os_mon:start_link(), - ?assertEqual(60000, emqx_os_mon:get_mem_check_interval()), ?assertEqual(ok, emqx_os_mon:set_mem_check_interval(30000)), ?assertEqual(60000, emqx_os_mon:get_mem_check_interval()), @@ -58,4 +62,3 @@ t_api(_) -> emqx_os_mon ! ignored, gen_server:stop(emqx_os_mon), ok. -