fix: start os_mon application temporary
This commit is contained in:
parent
848eb7e3c3
commit
b817e03c08
|
@ -38,15 +38,14 @@
|
|||
%% gen_server callbacks
|
||||
-export([
|
||||
init/1,
|
||||
handle_continue/2,
|
||||
handle_call/3,
|
||||
handle_cast/2,
|
||||
handle_info/2,
|
||||
terminate/2,
|
||||
code_change/3
|
||||
]).
|
||||
-ifdef(TEST).
|
||||
-export([is_sysmem_check_supported/0]).
|
||||
-endif.
|
||||
-export([is_os_check_supported/0]).
|
||||
|
||||
-include("emqx.hrl").
|
||||
|
||||
|
@ -83,12 +82,17 @@ current_sysmem_percent() ->
|
|||
%%--------------------------------------------------------------------
|
||||
|
||||
init([]) ->
|
||||
%% start os_mon temporarily
|
||||
{ok, _} = application:ensure_all_started(os_mon),
|
||||
{ok, undefined, {continue, setup}}.
|
||||
|
||||
handle_continue(setup, undefined) ->
|
||||
%% memsup is not reliable, ignore
|
||||
memsup:set_sysmem_high_watermark(1.0),
|
||||
SysHW = init_os_monitor(),
|
||||
MemRef = start_mem_check_timer(),
|
||||
CpuRef = start_cpu_check_timer(),
|
||||
{ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
|
||||
{noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
|
||||
|
||||
init_os_monitor() ->
|
||||
init_os_monitor(emqx:get_config([sysmon, os])).
|
||||
|
@ -182,12 +186,12 @@ start_cpu_check_timer() ->
|
|||
_ -> start_timer(Interval, cpu_check)
|
||||
end.
|
||||
|
||||
is_sysmem_check_supported() ->
|
||||
is_os_check_supported() ->
|
||||
{unix, linux} =:= os:type().
|
||||
|
||||
start_mem_check_timer() ->
|
||||
Interval = emqx:get_config([sysmon, os, mem_check_interval]),
|
||||
case is_integer(Interval) andalso is_sysmem_check_supported() of
|
||||
case is_integer(Interval) andalso is_os_check_supported() of
|
||||
true ->
|
||||
start_timer(Interval, mem_check);
|
||||
false ->
|
||||
|
@ -205,7 +209,7 @@ update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 ->
|
|||
<<"Deactivated mem usage alarm due to out of range threshold">>
|
||||
);
|
||||
update_mem_alarm_status(HWM) ->
|
||||
is_sysmem_check_supported() andalso
|
||||
is_os_check_supported() andalso
|
||||
do_update_mem_alarm_status(HWM),
|
||||
ok.
|
||||
|
||||
|
|
|
@ -3659,7 +3659,7 @@ shared_subscription_strategy() ->
|
|||
)}.
|
||||
|
||||
default_mem_check_interval() ->
|
||||
case emqx_sys_sup:is_os_mon_supported() of
|
||||
case emqx_os_mon:is_os_check_supported() of
|
||||
true -> <<"60s">>;
|
||||
false -> disabled
|
||||
end.
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
%% gen_server callbacks
|
||||
-export([
|
||||
init/1,
|
||||
handle_continue/2,
|
||||
handle_call/3,
|
||||
handle_cast/2,
|
||||
handle_info/2,
|
||||
|
@ -70,11 +71,14 @@ update(VM) ->
|
|||
|
||||
init([]) ->
|
||||
emqx_logger:set_proc_metadata(#{sysmon => true}),
|
||||
init_system_monitor(),
|
||||
{ok, undefined, {continue, setup}}.
|
||||
|
||||
handle_continue(setup, undefined) ->
|
||||
init_system_monitor(),
|
||||
%% Monitor cluster partition event
|
||||
ekka:monitor(partition, fun handle_partition_event/1),
|
||||
{ok, start_timer(#{timer => undefined, events => []})}.
|
||||
NewState = start_timer(#{timer => undefined, events => []}),
|
||||
{noreply, NewState, hibernate}.
|
||||
|
||||
start_timer(State) ->
|
||||
State#{timer := emqx_utils:start_timer(timer:seconds(2), reset)}.
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
-export([is_os_mon_supported/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
start_link() ->
|
||||
|
@ -28,7 +26,7 @@ start_link() ->
|
|||
|
||||
init([]) ->
|
||||
OsMon =
|
||||
case is_os_mon_supported() of
|
||||
case emqx_os_mon:is_os_check_supported() of
|
||||
true -> [child_spec(emqx_os_mon)];
|
||||
false -> []
|
||||
end,
|
||||
|
@ -45,9 +43,6 @@ init([]) ->
|
|||
%% Internal functions
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
is_os_mon_supported() ->
|
||||
erlang:function_exported(memsup, get_procmem_high_watermark, 0).
|
||||
|
||||
child_spec(Mod) ->
|
||||
child_spec(Mod, []).
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
get_otp_version/0
|
||||
]).
|
||||
|
||||
-export([cpu_util/0]).
|
||||
-export([cpu_util/0, cpu_util/1]).
|
||||
|
||||
-ifdef(TEST).
|
||||
-compile(export_all).
|
||||
|
@ -378,18 +378,30 @@ avg15() ->
|
|||
cpu_util() ->
|
||||
compat_windows(fun cpu_sup:util/0).
|
||||
|
||||
cpu_util(Args) ->
|
||||
compat_windows(fun cpu_sup:util/1, Args).
|
||||
|
||||
compat_windows(Fun) ->
|
||||
case os:type() of
|
||||
{win32, nt} ->
|
||||
0.0;
|
||||
_Type ->
|
||||
case catch Fun() of
|
||||
Val when is_float(Val) -> floor(Val * 100) / 100;
|
||||
Val when is_number(Val) -> Val;
|
||||
_Error -> 0.0
|
||||
end
|
||||
case compat_windows(Fun, []) of
|
||||
Val when is_float(Val) -> floor(Val * 100) / 100;
|
||||
Val when is_number(Val) -> Val;
|
||||
_ -> 0.0
|
||||
end.
|
||||
|
||||
compat_windows(Fun, Args) ->
|
||||
try
|
||||
case is_windows() of
|
||||
true -> 0.0;
|
||||
false when Args =:= [] -> Fun();
|
||||
false -> Fun(Args)
|
||||
end
|
||||
catch
|
||||
_:_ -> 0.0
|
||||
end.
|
||||
|
||||
is_windows() ->
|
||||
os:type() =:= {win32, nt}.
|
||||
|
||||
load(Avg) ->
|
||||
floor((Avg / 256) * 100) / 100.
|
||||
|
||||
|
|
|
@ -39,29 +39,47 @@ init_per_testcase(t_cpu_check_alarm, Config) ->
|
|||
%% 200ms
|
||||
cpu_check_interval => 200
|
||||
}),
|
||||
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
|
||||
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
|
||||
restart_os_mon(),
|
||||
Config;
|
||||
init_per_testcase(t_sys_mem_check_alarm, Config) ->
|
||||
case emqx_os_mon:is_sysmem_check_supported() of
|
||||
case emqx_os_mon:is_os_check_supported() of
|
||||
true ->
|
||||
SysMon = emqx_config:get([sysmon, os], #{}),
|
||||
emqx_config:put([sysmon, os], SysMon#{
|
||||
sysmem_high_watermark => 0.51,
|
||||
%% 200ms
|
||||
mem_check_interval => 200
|
||||
}),
|
||||
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
|
||||
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
|
||||
Config;
|
||||
});
|
||||
false ->
|
||||
Config
|
||||
end;
|
||||
ok
|
||||
end,
|
||||
restart_os_mon(),
|
||||
Config;
|
||||
init_per_testcase(_, Config) ->
|
||||
emqx_common_test_helpers:boot_modules(all),
|
||||
emqx_common_test_helpers:start_apps([]),
|
||||
restart_os_mon(),
|
||||
Config.
|
||||
|
||||
restart_os_mon() ->
|
||||
case emqx_os_mon:is_os_check_supported() of
|
||||
true ->
|
||||
ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
|
||||
{ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon);
|
||||
false ->
|
||||
_ = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
|
||||
_ = supervisor:delete_child(emqx_sys_sup, emqx_os_mon),
|
||||
%% run test on mac/windows.
|
||||
Mod = emqx_os_mon,
|
||||
OsMon = #{
|
||||
id => Mod,
|
||||
start => {Mod, start_link, []},
|
||||
restart => permanent,
|
||||
shutdown => 5000,
|
||||
type => worker,
|
||||
modules => [Mod]
|
||||
},
|
||||
{ok, _} = supervisor:start_child(emqx_sys_sup, OsMon)
|
||||
end.
|
||||
|
||||
t_api(_) ->
|
||||
?assertEqual(0.7, emqx_os_mon:get_sysmem_high_watermark()),
|
||||
?assertEqual(ok, emqx_os_mon:set_sysmem_high_watermark(0.8)),
|
||||
|
@ -81,7 +99,7 @@ t_api(_) ->
|
|||
ok.
|
||||
|
||||
t_sys_mem_check_disable(Config) ->
|
||||
case emqx_os_mon:is_sysmem_check_supported() of
|
||||
case emqx_os_mon:is_os_check_supported() of
|
||||
true -> do_sys_mem_check_disable(Config);
|
||||
false -> skip
|
||||
end.
|
||||
|
@ -100,7 +118,7 @@ do_sys_mem_check_disable(_Config) ->
|
|||
ok.
|
||||
|
||||
t_sys_mem_check_alarm(Config) ->
|
||||
case emqx_os_mon:is_sysmem_check_supported() of
|
||||
case emqx_os_mon:is_os_check_supported() of
|
||||
true -> do_sys_mem_check_alarm(Config);
|
||||
false -> skip
|
||||
end.
|
||||
|
@ -167,7 +185,7 @@ t_cpu_check_alarm(_) ->
|
|||
util,
|
||||
fun() -> CpuUtil end,
|
||||
fun() ->
|
||||
timer:sleep(500),
|
||||
timer:sleep(1000),
|
||||
Alarms = emqx_alarm:get_alarms(activated),
|
||||
?assert(
|
||||
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
|
||||
|
@ -193,7 +211,7 @@ t_cpu_check_alarm(_) ->
|
|||
?assert(is_binary(Msg)),
|
||||
emqx_config:put([sysmon, os, cpu_high_watermark], 1),
|
||||
emqx_config:put([sysmon, os, cpu_low_watermark], 0.96),
|
||||
timer:sleep(500),
|
||||
timer:sleep(800),
|
||||
?assertNot(
|
||||
emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
|
||||
)
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
asn1,
|
||||
syntax_tools,
|
||||
ssl,
|
||||
os_mon,
|
||||
%% started temporary in emqx to prevent crash vm when permanent.
|
||||
{os_mon, load},
|
||||
inets,
|
||||
compiler,
|
||||
runtime_tools,
|
||||
|
|
|
@ -197,13 +197,16 @@ vm_stats() ->
|
|||
].
|
||||
|
||||
vm_stats('cpu.idle') ->
|
||||
case cpu_sup:util([detailed]) of
|
||||
%% Not support for Windows
|
||||
{_, 0, 0, _} -> 0;
|
||||
{_Num, _Use, IdleList, _} -> proplists:get_value(idle, IdleList, 0)
|
||||
case emqx_vm:cpu_util([detailed]) of
|
||||
{_Num, _Use, List, _} when is_list(List) -> proplists:get_value(idle, List, 0);
|
||||
%% return {all, 0, 0, []} when cpu_sup is not started
|
||||
_ -> 0
|
||||
end;
|
||||
vm_stats('cpu.use') ->
|
||||
100 - vm_stats('cpu.idle');
|
||||
case vm_stats('cpu.idle') of
|
||||
0 -> 0;
|
||||
Idle -> 100 - Idle
|
||||
end;
|
||||
vm_stats('total.memory') ->
|
||||
{_, MemTotal} = get_sys_memory(),
|
||||
MemTotal;
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Removed os_mon application monitor support on Windows platforms to prevent VM crashes.
|
||||
Functionality remains on non-Windows platforms.
|
Loading…
Reference in New Issue