fix: /api/nodes is timeout if emqx in high load

This commit is contained in:
Zhongwen Deng 2023-01-29 10:25:28 +08:00
parent 7e8253e3af
commit 2d67bb3fb6
6 changed files with 109 additions and 11 deletions

View File

@ -130,8 +130,10 @@ handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = Stat
handle_info({timeout, _Timer, cpu_check}, State) ->
CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
case emqx_vm:cpu_util() of
0 ->
CPUVal = emqx_vm:cpu_util(),
case CPUVal of
%% 0 or 0.0
Busy when Busy == 0 ->
ok;
Busy when Busy > CPUHighWatermark ->
_ = emqx_alarm:activate(
@ -236,5 +238,5 @@ do_update_mem_alarm_status(HWM0) ->
ok.
usage_msg(Usage, What) ->
%% devide by 1.0 to ensure float point number
%% divide by 1.0 to ensure float point number
iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])).

View File

@ -232,8 +232,10 @@ mem_info() ->
Free = proplists:get_value(free_memory, Dataset),
[{total_memory, Total}, {used_memory, Total - Free}].
ftos(F) ->
io_lib:format("~.2f", [F / 1.0]).
ftos(F) when is_float(F) ->
float_to_binary(F, [{decimals, 2}]);
ftos(F) when is_integer(F) ->
ftos(F / 1.0).
%%%% erlang vm scheduler_usage fun copied from recon
scheduler_usage(Interval) when is_integer(Interval) ->
@ -391,11 +393,12 @@ cpu_util() ->
compat_windows(Fun) ->
case os:type() of
{win32, nt} ->
0;
0.0;
_Type ->
case catch Fun() of
Val when is_float(Val) -> floor(Val * 100) / 100;
Val when is_number(Val) -> Val;
_Error -> 0
_Error -> 0.0
end
end.

View File

@ -63,7 +63,7 @@ handle_info({timeout, _Timer, check}, State) ->
ProcessCount = erlang:system_info(process_count),
case ProcessCount / erlang:system_info(process_limit) of
Percent when Percent > ProcHighWatermark ->
Usage = io_lib:format("~p%", [Percent * 100]),
Usage = usage(Percent),
Message = [Usage, " process usage"],
emqx_alarm:activate(
too_many_processes,
@ -75,7 +75,7 @@ handle_info({timeout, _Timer, check}, State) ->
Message
);
Percent when Percent < ProcLowWatermark ->
Usage = io_lib:format("~p%", [Percent * 100]),
Usage = usage(Percent),
Message = [Usage, " process usage"],
emqx_alarm:ensure_deactivated(
too_many_processes,
@ -108,3 +108,6 @@ code_change(_OldVsn, State, _Extra) ->
start_check_timer() ->
Interval = emqx:get_config([sysmon, vm, process_check_interval]),
emqx_misc:start_timer(Interval, check).
usage(Percent) ->
integer_to_list(floor(Percent * 100)) ++ "%".

View File

@ -150,7 +150,7 @@ node_info() ->
get_sys_memory() ->
case os:type() of
{unix, linux} ->
load_ctl:get_sys_memory();
emqx_mgmt_sys_memory:get_sys_memory();
_ ->
{0, 0}
end.

View File

@ -26,4 +26,15 @@ start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
init([]) ->
{ok, {{one_for_one, 1, 5}, []}}.
LC = child_spec(emqx_mgmt_sys_memory, 5000, worker),
{ok, {{one_for_one, 1, 5}, [LC]}}.
child_spec(Mod, Shutdown, Type) ->
#{
id => Mod,
start => {Mod, start_link, []},
restart => permanent,
shutdown => Shutdown,
type => Type,
modules => [Mod]
}.

View File

@ -0,0 +1,79 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_mgmt_sys_memory).
-behaviour(gen_server).
-define(SYS_MEMORY_CACHE_KEY, ?MODULE).
-define(TIMEOUT, 3000).
-export([start_link/0, get_sys_memory/0, get_sys_memory/1]).
-export([
init/1,
handle_call/3,
handle_cast/2,
handle_info/2,
terminate/2,
code_change/3
]).
get_sys_memory() ->
get_sys_memory(?TIMEOUT).
get_sys_memory(Timeout) ->
try
gen_server:call(?MODULE, get_sys_memory, Timeout)
catch
exit:{timeout, _} ->
get_memory_from_cache()
end.
start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
init([]) ->
{ok, #{last_time => 0}}.
handle_call(get_sys_memory, _From, State = #{last_time := LastTime}) ->
Now = erlang:system_time(millisecond),
case Now - LastTime >= ?TIMEOUT of
true ->
Memory = load_ctl:get_sys_memory(),
persistent_term:put(?SYS_MEMORY_CACHE_KEY, Memory),
{reply, Memory, State#{last_time => Now}};
false ->
{reply, get_memory_from_cache(), State}
end;
handle_call(_Request, _From, State = #{}) ->
{reply, ok, State}.
handle_cast(_Request, State = #{}) ->
{noreply, State}.
handle_info(_Info, State = #{}) ->
{noreply, State}.
terminate(_Reason, _State = #{}) ->
ok.
code_change(_OldVsn, State = #{}, _Extra) ->
{ok, State}.
%%%===================================================================
%%% Internal functions
%%%===================================================================
get_memory_from_cache() ->
persistent_term:get(?SYS_MEMORY_CACHE_KEY, {0, 0}).