fix: /api/nodes is timeout if emqx in high load
This commit is contained in:
parent
7e8253e3af
commit
2d67bb3fb6
|
@ -130,8 +130,10 @@ handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = Stat
|
||||||
handle_info({timeout, _Timer, cpu_check}, State) ->
|
handle_info({timeout, _Timer, cpu_check}, State) ->
|
||||||
CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
|
CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
|
||||||
CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
|
CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
|
||||||
case emqx_vm:cpu_util() of
|
CPUVal = emqx_vm:cpu_util(),
|
||||||
0 ->
|
case CPUVal of
|
||||||
|
%% 0 or 0.0
|
||||||
|
Busy when Busy == 0 ->
|
||||||
ok;
|
ok;
|
||||||
Busy when Busy > CPUHighWatermark ->
|
Busy when Busy > CPUHighWatermark ->
|
||||||
_ = emqx_alarm:activate(
|
_ = emqx_alarm:activate(
|
||||||
|
@ -236,5 +238,5 @@ do_update_mem_alarm_status(HWM0) ->
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
usage_msg(Usage, What) ->
|
usage_msg(Usage, What) ->
|
||||||
%% devide by 1.0 to ensure float point number
|
%% divide by 1.0 to ensure float point number
|
||||||
iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])).
|
iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])).
|
||||||
|
|
|
@ -232,8 +232,10 @@ mem_info() ->
|
||||||
Free = proplists:get_value(free_memory, Dataset),
|
Free = proplists:get_value(free_memory, Dataset),
|
||||||
[{total_memory, Total}, {used_memory, Total - Free}].
|
[{total_memory, Total}, {used_memory, Total - Free}].
|
||||||
|
|
||||||
ftos(F) ->
|
ftos(F) when is_float(F) ->
|
||||||
io_lib:format("~.2f", [F / 1.0]).
|
float_to_binary(F, [{decimals, 2}]);
|
||||||
|
ftos(F) when is_integer(F) ->
|
||||||
|
ftos(F / 1.0).
|
||||||
|
|
||||||
%%%% erlang vm scheduler_usage fun copied from recon
|
%%%% erlang vm scheduler_usage fun copied from recon
|
||||||
scheduler_usage(Interval) when is_integer(Interval) ->
|
scheduler_usage(Interval) when is_integer(Interval) ->
|
||||||
|
@ -391,11 +393,12 @@ cpu_util() ->
|
||||||
compat_windows(Fun) ->
|
compat_windows(Fun) ->
|
||||||
case os:type() of
|
case os:type() of
|
||||||
{win32, nt} ->
|
{win32, nt} ->
|
||||||
0;
|
0.0;
|
||||||
_Type ->
|
_Type ->
|
||||||
case catch Fun() of
|
case catch Fun() of
|
||||||
|
Val when is_float(Val) -> floor(Val * 100) / 100;
|
||||||
Val when is_number(Val) -> Val;
|
Val when is_number(Val) -> Val;
|
||||||
_Error -> 0
|
_Error -> 0.0
|
||||||
end
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|
|
@ -63,7 +63,7 @@ handle_info({timeout, _Timer, check}, State) ->
|
||||||
ProcessCount = erlang:system_info(process_count),
|
ProcessCount = erlang:system_info(process_count),
|
||||||
case ProcessCount / erlang:system_info(process_limit) of
|
case ProcessCount / erlang:system_info(process_limit) of
|
||||||
Percent when Percent > ProcHighWatermark ->
|
Percent when Percent > ProcHighWatermark ->
|
||||||
Usage = io_lib:format("~p%", [Percent * 100]),
|
Usage = usage(Percent),
|
||||||
Message = [Usage, " process usage"],
|
Message = [Usage, " process usage"],
|
||||||
emqx_alarm:activate(
|
emqx_alarm:activate(
|
||||||
too_many_processes,
|
too_many_processes,
|
||||||
|
@ -75,7 +75,7 @@ handle_info({timeout, _Timer, check}, State) ->
|
||||||
Message
|
Message
|
||||||
);
|
);
|
||||||
Percent when Percent < ProcLowWatermark ->
|
Percent when Percent < ProcLowWatermark ->
|
||||||
Usage = io_lib:format("~p%", [Percent * 100]),
|
Usage = usage(Percent),
|
||||||
Message = [Usage, " process usage"],
|
Message = [Usage, " process usage"],
|
||||||
emqx_alarm:ensure_deactivated(
|
emqx_alarm:ensure_deactivated(
|
||||||
too_many_processes,
|
too_many_processes,
|
||||||
|
@ -108,3 +108,6 @@ code_change(_OldVsn, State, _Extra) ->
|
||||||
start_check_timer() ->
|
start_check_timer() ->
|
||||||
Interval = emqx:get_config([sysmon, vm, process_check_interval]),
|
Interval = emqx:get_config([sysmon, vm, process_check_interval]),
|
||||||
emqx_misc:start_timer(Interval, check).
|
emqx_misc:start_timer(Interval, check).
|
||||||
|
|
||||||
|
usage(Percent) ->
|
||||||
|
integer_to_list(floor(Percent * 100)) ++ "%".
|
||||||
|
|
|
@ -150,7 +150,7 @@ node_info() ->
|
||||||
get_sys_memory() ->
|
get_sys_memory() ->
|
||||||
case os:type() of
|
case os:type() of
|
||||||
{unix, linux} ->
|
{unix, linux} ->
|
||||||
load_ctl:get_sys_memory();
|
emqx_mgmt_sys_memory:get_sys_memory();
|
||||||
_ ->
|
_ ->
|
||||||
{0, 0}
|
{0, 0}
|
||||||
end.
|
end.
|
||||||
|
|
|
@ -26,4 +26,15 @@ start_link() ->
|
||||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||||
|
|
||||||
init([]) ->
|
init([]) ->
|
||||||
{ok, {{one_for_one, 1, 5}, []}}.
|
LC = child_spec(emqx_mgmt_sys_memory, 5000, worker),
|
||||||
|
{ok, {{one_for_one, 1, 5}, [LC]}}.
|
||||||
|
|
||||||
|
child_spec(Mod, Shutdown, Type) ->
|
||||||
|
#{
|
||||||
|
id => Mod,
|
||||||
|
start => {Mod, start_link, []},
|
||||||
|
restart => permanent,
|
||||||
|
shutdown => Shutdown,
|
||||||
|
type => Type,
|
||||||
|
modules => [Mod]
|
||||||
|
}.
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
-module(emqx_mgmt_sys_memory).
|
||||||
|
|
||||||
|
-behaviour(gen_server).
|
||||||
|
-define(SYS_MEMORY_CACHE_KEY, ?MODULE).
|
||||||
|
-define(TIMEOUT, 3000).
|
||||||
|
|
||||||
|
-export([start_link/0, get_sys_memory/0, get_sys_memory/1]).
|
||||||
|
-export([
|
||||||
|
init/1,
|
||||||
|
handle_call/3,
|
||||||
|
handle_cast/2,
|
||||||
|
handle_info/2,
|
||||||
|
terminate/2,
|
||||||
|
code_change/3
|
||||||
|
]).
|
||||||
|
|
||||||
|
get_sys_memory() ->
|
||||||
|
get_sys_memory(?TIMEOUT).
|
||||||
|
|
||||||
|
get_sys_memory(Timeout) ->
|
||||||
|
try
|
||||||
|
gen_server:call(?MODULE, get_sys_memory, Timeout)
|
||||||
|
catch
|
||||||
|
exit:{timeout, _} ->
|
||||||
|
get_memory_from_cache()
|
||||||
|
end.
|
||||||
|
|
||||||
|
start_link() ->
|
||||||
|
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
{ok, #{last_time => 0}}.
|
||||||
|
|
||||||
|
handle_call(get_sys_memory, _From, State = #{last_time := LastTime}) ->
|
||||||
|
Now = erlang:system_time(millisecond),
|
||||||
|
case Now - LastTime >= ?TIMEOUT of
|
||||||
|
true ->
|
||||||
|
Memory = load_ctl:get_sys_memory(),
|
||||||
|
persistent_term:put(?SYS_MEMORY_CACHE_KEY, Memory),
|
||||||
|
{reply, Memory, State#{last_time => Now}};
|
||||||
|
false ->
|
||||||
|
{reply, get_memory_from_cache(), State}
|
||||||
|
end;
|
||||||
|
handle_call(_Request, _From, State = #{}) ->
|
||||||
|
{reply, ok, State}.
|
||||||
|
|
||||||
|
handle_cast(_Request, State = #{}) ->
|
||||||
|
{noreply, State}.
|
||||||
|
|
||||||
|
handle_info(_Info, State = #{}) ->
|
||||||
|
{noreply, State}.
|
||||||
|
|
||||||
|
terminate(_Reason, _State = #{}) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
code_change(_OldVsn, State = #{}, _Extra) ->
|
||||||
|
{ok, State}.
|
||||||
|
|
||||||
|
%%%===================================================================
|
||||||
|
%%% Internal functions
|
||||||
|
%%%===================================================================
|
||||||
|
|
||||||
|
get_memory_from_cache() ->
|
||||||
|
persistent_term:get(?SYS_MEMORY_CACHE_KEY, {0, 0}).
|
Loading…
Reference in New Issue