fix: start os_mon application temporary

This commit is contained in:
zhongwencool 2023-08-15 17:48:59 +08:00
parent 848eb7e3c3
commit b817e03c08
9 changed files with 86 additions and 47 deletions

View File

@ -38,15 +38,14 @@
%% gen_server callbacks
-export([
init/1,
handle_continue/2,
handle_call/3,
handle_cast/2,
handle_info/2,
terminate/2,
code_change/3
]).
-ifdef(TEST).
-export([is_sysmem_check_supported/0]).
-endif.
-export([is_os_check_supported/0]).
-include("emqx.hrl").
@ -83,12 +82,17 @@ current_sysmem_percent() ->
%%--------------------------------------------------------------------
init([]) ->
%% start os_mon temporarily
{ok, _} = application:ensure_all_started(os_mon),
{ok, undefined, {continue, setup}}.
handle_continue(setup, undefined) ->
%% memsup is not reliable, ignore
memsup:set_sysmem_high_watermark(1.0),
SysHW = init_os_monitor(),
MemRef = start_mem_check_timer(),
CpuRef = start_cpu_check_timer(),
{ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
{noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
init_os_monitor() ->
init_os_monitor(emqx:get_config([sysmon, os])).
@ -182,12 +186,12 @@ start_cpu_check_timer() ->
_ -> start_timer(Interval, cpu_check)
end.
is_sysmem_check_supported() ->
is_os_check_supported() ->
{unix, linux} =:= os:type().
start_mem_check_timer() ->
Interval = emqx:get_config([sysmon, os, mem_check_interval]),
case is_integer(Interval) andalso is_sysmem_check_supported() of
case is_integer(Interval) andalso is_os_check_supported() of
true ->
start_timer(Interval, mem_check);
false ->
@ -205,7 +209,7 @@ update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 ->
<<"Deactivated mem usage alarm due to out of range threshold">>
);
update_mem_alarm_status(HWM) ->
is_sysmem_check_supported() andalso
is_os_check_supported() andalso
do_update_mem_alarm_status(HWM),
ok.

View File

@ -3659,7 +3659,7 @@ shared_subscription_strategy() ->
)}.
default_mem_check_interval() ->
case emqx_sys_sup:is_os_mon_supported() of
case emqx_os_mon:is_os_check_supported() of
true -> <<"60s">>;
false -> disabled
end.

View File

@ -29,6 +29,7 @@
%% gen_server callbacks
-export([
init/1,
handle_continue/2,
handle_call/3,
handle_cast/2,
handle_info/2,
@ -70,11 +71,14 @@ update(VM) ->
init([]) ->
emqx_logger:set_proc_metadata(#{sysmon => true}),
init_system_monitor(),
{ok, undefined, {continue, setup}}.
handle_continue(setup, undefined) ->
init_system_monitor(),
%% Monitor cluster partition event
ekka:monitor(partition, fun handle_partition_event/1),
{ok, start_timer(#{timer => undefined, events => []})}.
NewState = start_timer(#{timer => undefined, events => []}),
{noreply, NewState, hibernate}.
start_timer(State) ->
State#{timer := emqx_utils:start_timer(timer:seconds(2), reset)}.

View File

@ -19,8 +19,6 @@
-behaviour(supervisor).
-export([start_link/0]).
-export([is_os_mon_supported/0]).
-export([init/1]).
start_link() ->
@ -28,7 +26,7 @@ start_link() ->
init([]) ->
OsMon =
case is_os_mon_supported() of
case emqx_os_mon:is_os_check_supported() of
true -> [child_spec(emqx_os_mon)];
false -> []
end,
@ -45,9 +43,6 @@ init([]) ->
%% Internal functions
%%--------------------------------------------------------------------
is_os_mon_supported() ->
erlang:function_exported(memsup, get_procmem_high_watermark, 0).
child_spec(Mod) ->
child_spec(Mod, []).

View File

@ -44,7 +44,7 @@
get_otp_version/0
]).
-export([cpu_util/0]).
-export([cpu_util/0, cpu_util/1]).
-ifdef(TEST).
-compile(export_all).
@ -378,18 +378,30 @@ avg15() ->
cpu_util() ->
compat_windows(fun cpu_sup:util/0).
cpu_util(Args) ->
compat_windows(fun cpu_sup:util/1, Args).
compat_windows(Fun) ->
case os:type() of
{win32, nt} ->
0.0;
_Type ->
case catch Fun() of
Val when is_float(Val) -> floor(Val * 100) / 100;
Val when is_number(Val) -> Val;
_Error -> 0.0
end
case compat_windows(Fun, []) of
Val when is_float(Val) -> floor(Val * 100) / 100;
Val when is_number(Val) -> Val;
_ -> 0.0
end.
compat_windows(Fun, Args) ->
try
case is_windows() of
true -> 0.0;
false when Args =:= [] -> Fun();
false -> Fun(Args)
end
catch
_:_ -> 0.0
end.
is_windows() ->
os:type() =:= {win32, nt}.
load(Avg) ->
floor((Avg / 256) * 100) / 100.

View File

@ -39,29 +39,47 @@ init_per_testcase(t_cpu_check_alarm, Config) ->
%% 200ms
cpu_check_interval => 200
}),
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
restart_os_mon(),
Config;
init_per_testcase(t_sys_mem_check_alarm, Config) ->
case emqx_os_mon:is_sysmem_check_supported() of
case emqx_os_mon:is_os_check_supported() of
true ->
SysMon = emqx_config:get([sysmon, os], #{}),
emqx_config:put([sysmon, os], SysMon#{
sysmem_high_watermark => 0.51,
%% 200ms
mem_check_interval => 200
}),
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
Config;
});
false ->
Config
end;
ok
end,
restart_os_mon(),
Config;
init_per_testcase(_, Config) ->
emqx_common_test_helpers:boot_modules(all),
emqx_common_test_helpers:start_apps([]),
restart_os_mon(),
Config.
restart_os_mon() ->
case emqx_os_mon:is_os_check_supported() of
true ->
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon);
false ->
_ = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
_ = supervisor:delete_child(emqx_sys_sup, emqx_os_mon),
%% run test on mac/windows.
Mod = emqx_os_mon,
OsMon = #{
id => Mod,
start => {Mod, start_link, []},
restart => permanent,
shutdown => 5000,
type => worker,
modules => [Mod]
},
{ok, _} = supervisor:start_child(emqx_sys_sup, OsMon)
end.
t_api(_) ->
?assertEqual(0.7, emqx_os_mon:get_sysmem_high_watermark()),
?assertEqual(ok, emqx_os_mon:set_sysmem_high_watermark(0.8)),
@ -81,7 +99,7 @@ t_api(_) ->
ok.
t_sys_mem_check_disable(Config) ->
case emqx_os_mon:is_sysmem_check_supported() of
case emqx_os_mon:is_os_check_supported() of
true -> do_sys_mem_check_disable(Config);
false -> skip
end.
@ -100,7 +118,7 @@ do_sys_mem_check_disable(_Config) ->
ok.
t_sys_mem_check_alarm(Config) ->
case emqx_os_mon:is_sysmem_check_supported() of
case emqx_os_mon:is_os_check_supported() of
true -> do_sys_mem_check_alarm(Config);
false -> skip
end.
@ -167,7 +185,7 @@ t_cpu_check_alarm(_) ->
util,
fun() -> CpuUtil end,
fun() ->
timer:sleep(500),
timer:sleep(1000),
Alarms = emqx_alarm:get_alarms(activated),
?assert(
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
@ -193,7 +211,7 @@ t_cpu_check_alarm(_) ->
?assert(is_binary(Msg)),
emqx_config:put([sysmon, os, cpu_high_watermark], 1),
emqx_config:put([sysmon, os, cpu_low_watermark], 0.96),
timer:sleep(500),
timer:sleep(800),
?assertNot(
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
)

View File

@ -17,7 +17,8 @@
asn1,
syntax_tools,
ssl,
os_mon,
%% started temporary in emqx to prevent crash vm when permanent.
{os_mon, load},
inets,
compiler,
runtime_tools,

View File

@ -197,13 +197,16 @@ vm_stats() ->
].
vm_stats('cpu.idle') ->
case cpu_sup:util([detailed]) of
%% Not support for Windows
{_, 0, 0, _} -> 0;
{_Num, _Use, IdleList, _} -> proplists:get_value(idle, IdleList, 0)
case emqx_vm:cpu_util([detailed]) of
{_Num, _Use, List, _} when is_list(List) -> proplists:get_value(idle, List, 0);
%% return {all, 0, 0, []} when cpu_sup is not started
_ -> 0
end;
vm_stats('cpu.use') ->
100 - vm_stats('cpu.idle');
case vm_stats('cpu.idle') of
0 -> 0;
Idle -> 100 - Idle
end;
vm_stats('total.memory') ->
{_, MemTotal} = get_sys_memory(),
MemTotal;

View File

@ -0,0 +1,2 @@
Removed os_mon application monitor support on Windows platforms to prevent VM crashes.
Functionality remains on non-Windows platforms.