fix: start os_mon application temporary

This commit is contained in:
zhongwencool 2023-08-15 17:48:59 +08:00
parent 848eb7e3c3
commit b817e03c08
9 changed files with 86 additions and 47 deletions

View File

@ -38,15 +38,14 @@
%% gen_server callbacks %% gen_server callbacks
-export([ -export([
init/1, init/1,
handle_continue/2,
handle_call/3, handle_call/3,
handle_cast/2, handle_cast/2,
handle_info/2, handle_info/2,
terminate/2, terminate/2,
code_change/3 code_change/3
]). ]).
-ifdef(TEST). -export([is_os_check_supported/0]).
-export([is_sysmem_check_supported/0]).
-endif.
-include("emqx.hrl"). -include("emqx.hrl").
@ -83,12 +82,17 @@ current_sysmem_percent() ->
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
init([]) -> init([]) ->
%% start os_mon temporarily
{ok, _} = application:ensure_all_started(os_mon),
{ok, undefined, {continue, setup}}.
handle_continue(setup, undefined) ->
%% memsup is not reliable, ignore %% memsup is not reliable, ignore
memsup:set_sysmem_high_watermark(1.0), memsup:set_sysmem_high_watermark(1.0),
SysHW = init_os_monitor(), SysHW = init_os_monitor(),
MemRef = start_mem_check_timer(), MemRef = start_mem_check_timer(),
CpuRef = start_cpu_check_timer(), CpuRef = start_cpu_check_timer(),
{ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}. {noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
init_os_monitor() -> init_os_monitor() ->
init_os_monitor(emqx:get_config([sysmon, os])). init_os_monitor(emqx:get_config([sysmon, os])).
@ -182,12 +186,12 @@ start_cpu_check_timer() ->
_ -> start_timer(Interval, cpu_check) _ -> start_timer(Interval, cpu_check)
end. end.
is_sysmem_check_supported() -> is_os_check_supported() ->
{unix, linux} =:= os:type(). {unix, linux} =:= os:type().
start_mem_check_timer() -> start_mem_check_timer() ->
Interval = emqx:get_config([sysmon, os, mem_check_interval]), Interval = emqx:get_config([sysmon, os, mem_check_interval]),
case is_integer(Interval) andalso is_sysmem_check_supported() of case is_integer(Interval) andalso is_os_check_supported() of
true -> true ->
start_timer(Interval, mem_check); start_timer(Interval, mem_check);
false -> false ->
@ -205,7 +209,7 @@ update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 ->
<<"Deactivated mem usage alarm due to out of range threshold">> <<"Deactivated mem usage alarm due to out of range threshold">>
); );
update_mem_alarm_status(HWM) -> update_mem_alarm_status(HWM) ->
is_sysmem_check_supported() andalso is_os_check_supported() andalso
do_update_mem_alarm_status(HWM), do_update_mem_alarm_status(HWM),
ok. ok.

View File

@ -3659,7 +3659,7 @@ shared_subscription_strategy() ->
)}. )}.
default_mem_check_interval() -> default_mem_check_interval() ->
case emqx_sys_sup:is_os_mon_supported() of case emqx_os_mon:is_os_check_supported() of
true -> <<"60s">>; true -> <<"60s">>;
false -> disabled false -> disabled
end. end.

View File

@ -29,6 +29,7 @@
%% gen_server callbacks %% gen_server callbacks
-export([ -export([
init/1, init/1,
handle_continue/2,
handle_call/3, handle_call/3,
handle_cast/2, handle_cast/2,
handle_info/2, handle_info/2,
@ -70,11 +71,14 @@ update(VM) ->
init([]) -> init([]) ->
emqx_logger:set_proc_metadata(#{sysmon => true}), emqx_logger:set_proc_metadata(#{sysmon => true}),
init_system_monitor(), {ok, undefined, {continue, setup}}.
handle_continue(setup, undefined) ->
init_system_monitor(),
%% Monitor cluster partition event %% Monitor cluster partition event
ekka:monitor(partition, fun handle_partition_event/1), ekka:monitor(partition, fun handle_partition_event/1),
{ok, start_timer(#{timer => undefined, events => []})}. NewState = start_timer(#{timer => undefined, events => []}),
{noreply, NewState, hibernate}.
start_timer(State) -> start_timer(State) ->
State#{timer := emqx_utils:start_timer(timer:seconds(2), reset)}. State#{timer := emqx_utils:start_timer(timer:seconds(2), reset)}.

View File

@ -19,8 +19,6 @@
-behaviour(supervisor). -behaviour(supervisor).
-export([start_link/0]). -export([start_link/0]).
-export([is_os_mon_supported/0]).
-export([init/1]). -export([init/1]).
start_link() -> start_link() ->
@ -28,7 +26,7 @@ start_link() ->
init([]) -> init([]) ->
OsMon = OsMon =
case is_os_mon_supported() of case emqx_os_mon:is_os_check_supported() of
true -> [child_spec(emqx_os_mon)]; true -> [child_spec(emqx_os_mon)];
false -> [] false -> []
end, end,
@ -45,9 +43,6 @@ init([]) ->
%% Internal functions %% Internal functions
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
is_os_mon_supported() ->
erlang:function_exported(memsup, get_procmem_high_watermark, 0).
child_spec(Mod) -> child_spec(Mod) ->
child_spec(Mod, []). child_spec(Mod, []).

View File

@ -44,7 +44,7 @@
get_otp_version/0 get_otp_version/0
]). ]).
-export([cpu_util/0]). -export([cpu_util/0, cpu_util/1]).
-ifdef(TEST). -ifdef(TEST).
-compile(export_all). -compile(export_all).
@ -378,18 +378,30 @@ avg15() ->
cpu_util() -> cpu_util() ->
compat_windows(fun cpu_sup:util/0). compat_windows(fun cpu_sup:util/0).
cpu_util(Args) ->
compat_windows(fun cpu_sup:util/1, Args).
compat_windows(Fun) -> compat_windows(Fun) ->
case os:type() of case compat_windows(Fun, []) of
{win32, nt} -> Val when is_float(Val) -> floor(Val * 100) / 100;
0.0; Val when is_number(Val) -> Val;
_Type -> _ -> 0.0
case catch Fun() of
Val when is_float(Val) -> floor(Val * 100) / 100;
Val when is_number(Val) -> Val;
_Error -> 0.0
end
end. end.
compat_windows(Fun, Args) ->
try
case is_windows() of
true -> 0.0;
false when Args =:= [] -> Fun();
false -> Fun(Args)
end
catch
_:_ -> 0.0
end.
is_windows() ->
os:type() =:= {win32, nt}.
load(Avg) -> load(Avg) ->
floor((Avg / 256) * 100) / 100. floor((Avg / 256) * 100) / 100.

View File

@ -39,29 +39,47 @@ init_per_testcase(t_cpu_check_alarm, Config) ->
%% 200ms %% 200ms
cpu_check_interval => 200 cpu_check_interval => 200
}), }),
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon), restart_os_mon(),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
Config; Config;
init_per_testcase(t_sys_mem_check_alarm, Config) -> init_per_testcase(t_sys_mem_check_alarm, Config) ->
case emqx_os_mon:is_sysmem_check_supported() of case emqx_os_mon:is_os_check_supported() of
true -> true ->
SysMon = emqx_config:get([sysmon, os], #{}), SysMon = emqx_config:get([sysmon, os], #{}),
emqx_config:put([sysmon, os], SysMon#{ emqx_config:put([sysmon, os], SysMon#{
sysmem_high_watermark => 0.51, sysmem_high_watermark => 0.51,
%% 200ms %% 200ms
mem_check_interval => 200 mem_check_interval => 200
}), });
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
Config;
false -> false ->
Config ok
end; end,
restart_os_mon(),
Config;
init_per_testcase(_, Config) -> init_per_testcase(_, Config) ->
emqx_common_test_helpers:boot_modules(all), restart_os_mon(),
emqx_common_test_helpers:start_apps([]),
Config. Config.
restart_os_mon() ->
case emqx_os_mon:is_os_check_supported() of
true ->
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon);
false ->
_ = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
_ = supervisor:delete_child(emqx_sys_sup, emqx_os_mon),
%% run test on mac/windows.
Mod = emqx_os_mon,
OsMon = #{
id => Mod,
start => {Mod, start_link, []},
restart => permanent,
shutdown => 5000,
type => worker,
modules => [Mod]
},
{ok, _} = supervisor:start_child(emqx_sys_sup, OsMon)
end.
t_api(_) -> t_api(_) ->
?assertEqual(0.7, emqx_os_mon:get_sysmem_high_watermark()), ?assertEqual(0.7, emqx_os_mon:get_sysmem_high_watermark()),
?assertEqual(ok, emqx_os_mon:set_sysmem_high_watermark(0.8)), ?assertEqual(ok, emqx_os_mon:set_sysmem_high_watermark(0.8)),
@ -81,7 +99,7 @@ t_api(_) ->
ok. ok.
t_sys_mem_check_disable(Config) -> t_sys_mem_check_disable(Config) ->
case emqx_os_mon:is_sysmem_check_supported() of case emqx_os_mon:is_os_check_supported() of
true -> do_sys_mem_check_disable(Config); true -> do_sys_mem_check_disable(Config);
false -> skip false -> skip
end. end.
@ -100,7 +118,7 @@ do_sys_mem_check_disable(_Config) ->
ok. ok.
t_sys_mem_check_alarm(Config) -> t_sys_mem_check_alarm(Config) ->
case emqx_os_mon:is_sysmem_check_supported() of case emqx_os_mon:is_os_check_supported() of
true -> do_sys_mem_check_alarm(Config); true -> do_sys_mem_check_alarm(Config);
false -> skip false -> skip
end. end.
@ -167,7 +185,7 @@ t_cpu_check_alarm(_) ->
util, util,
fun() -> CpuUtil end, fun() -> CpuUtil end,
fun() -> fun() ->
timer:sleep(500), timer:sleep(1000),
Alarms = emqx_alarm:get_alarms(activated), Alarms = emqx_alarm:get_alarms(activated),
?assert( ?assert(
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
@ -193,7 +211,7 @@ t_cpu_check_alarm(_) ->
?assert(is_binary(Msg)), ?assert(is_binary(Msg)),
emqx_config:put([sysmon, os, cpu_high_watermark], 1), emqx_config:put([sysmon, os, cpu_high_watermark], 1),
emqx_config:put([sysmon, os, cpu_low_watermark], 0.96), emqx_config:put([sysmon, os, cpu_low_watermark], 0.96),
timer:sleep(500), timer:sleep(800),
?assertNot( ?assertNot(
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated)) emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
) )

View File

@ -17,7 +17,8 @@
asn1, asn1,
syntax_tools, syntax_tools,
ssl, ssl,
os_mon, %% started temporary in emqx to prevent crash vm when permanent.
{os_mon, load},
inets, inets,
compiler, compiler,
runtime_tools, runtime_tools,

View File

@ -197,13 +197,16 @@ vm_stats() ->
]. ].
vm_stats('cpu.idle') -> vm_stats('cpu.idle') ->
case cpu_sup:util([detailed]) of case emqx_vm:cpu_util([detailed]) of
%% Not support for Windows {_Num, _Use, List, _} when is_list(List) -> proplists:get_value(idle, List, 0);
{_, 0, 0, _} -> 0; %% return {all, 0, 0, []} when cpu_sup is not started
{_Num, _Use, IdleList, _} -> proplists:get_value(idle, IdleList, 0) _ -> 0
end; end;
vm_stats('cpu.use') -> vm_stats('cpu.use') ->
100 - vm_stats('cpu.idle'); case vm_stats('cpu.idle') of
0 -> 0;
Idle -> 100 - Idle
end;
vm_stats('total.memory') -> vm_stats('total.memory') ->
{_, MemTotal} = get_sys_memory(), {_, MemTotal} = get_sys_memory(),
MemTotal; MemTotal;

View File

@ -0,0 +1,2 @@
Removed os_mon application monitor support on Windows platforms to prevent VM crashes.
Functionality remains on non-Windows platforms.