From 2d67bb3fb67cb8544dd401699d93ad01d67f59b2 Mon Sep 17 00:00:00 2001 From: Zhongwen Deng Date: Sun, 29 Jan 2023 10:25:28 +0800 Subject: [PATCH] fix: /api/nodes is timeout if emqx in high load --- apps/emqx/src/emqx_os_mon.erl | 8 +- apps/emqx/src/emqx_vm.erl | 11 ++- apps/emqx/src/emqx_vm_mon.erl | 7 +- apps/emqx_management/src/emqx_mgmt.erl | 2 +- apps/emqx_management/src/emqx_mgmt_sup.erl | 13 ++- .../src/emqx_mgmt_sys_memory.erl | 79 +++++++++++++++++++ 6 files changed, 109 insertions(+), 11 deletions(-) create mode 100644 apps/emqx_management/src/emqx_mgmt_sys_memory.erl diff --git a/apps/emqx/src/emqx_os_mon.erl b/apps/emqx/src/emqx_os_mon.erl index a06f56a4c..5c6987ea0 100644 --- a/apps/emqx/src/emqx_os_mon.erl +++ b/apps/emqx/src/emqx_os_mon.erl @@ -130,8 +130,10 @@ handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = Stat handle_info({timeout, _Timer, cpu_check}, State) -> CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100, CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100, - case emqx_vm:cpu_util() of - 0 -> + CPUVal = emqx_vm:cpu_util(), + case CPUVal of + %% 0 or 0.0 + Busy when Busy == 0 -> ok; Busy when Busy > CPUHighWatermark -> _ = emqx_alarm:activate( @@ -236,5 +238,5 @@ do_update_mem_alarm_status(HWM0) -> ok. usage_msg(Usage, What) -> - %% devide by 1.0 to ensure float point number + %% divide by 1.0 to ensure float point number iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])). diff --git a/apps/emqx/src/emqx_vm.erl b/apps/emqx/src/emqx_vm.erl index cf1a9dc08..fc94a461a 100644 --- a/apps/emqx/src/emqx_vm.erl +++ b/apps/emqx/src/emqx_vm.erl @@ -232,8 +232,10 @@ mem_info() -> Free = proplists:get_value(free_memory, Dataset), [{total_memory, Total}, {used_memory, Total - Free}]. -ftos(F) -> - io_lib:format("~.2f", [F / 1.0]). +ftos(F) when is_float(F) -> + float_to_binary(F, [{decimals, 2}]); +ftos(F) when is_integer(F) -> + ftos(F / 1.0). %%%% erlang vm scheduler_usage fun copied from recon scheduler_usage(Interval) when is_integer(Interval) -> @@ -391,11 +393,12 @@ cpu_util() -> compat_windows(Fun) -> case os:type() of {win32, nt} -> - 0; + 0.0; _Type -> case catch Fun() of + Val when is_float(Val) -> floor(Val * 100) / 100; Val when is_number(Val) -> Val; - _Error -> 0 + _Error -> 0.0 end end. diff --git a/apps/emqx/src/emqx_vm_mon.erl b/apps/emqx/src/emqx_vm_mon.erl index 5447e94e9..1327a1bb0 100644 --- a/apps/emqx/src/emqx_vm_mon.erl +++ b/apps/emqx/src/emqx_vm_mon.erl @@ -63,7 +63,7 @@ handle_info({timeout, _Timer, check}, State) -> ProcessCount = erlang:system_info(process_count), case ProcessCount / erlang:system_info(process_limit) of Percent when Percent > ProcHighWatermark -> - Usage = io_lib:format("~p%", [Percent * 100]), + Usage = usage(Percent), Message = [Usage, " process usage"], emqx_alarm:activate( too_many_processes, @@ -75,7 +75,7 @@ handle_info({timeout, _Timer, check}, State) -> Message ); Percent when Percent < ProcLowWatermark -> - Usage = io_lib:format("~p%", [Percent * 100]), + Usage = usage(Percent), Message = [Usage, " process usage"], emqx_alarm:ensure_deactivated( too_many_processes, @@ -108,3 +108,6 @@ code_change(_OldVsn, State, _Extra) -> start_check_timer() -> Interval = emqx:get_config([sysmon, vm, process_check_interval]), emqx_misc:start_timer(Interval, check). + +usage(Percent) -> + integer_to_list(floor(Percent * 100)) ++ "%". diff --git a/apps/emqx_management/src/emqx_mgmt.erl b/apps/emqx_management/src/emqx_mgmt.erl index 6b38e8ca0..09adde8bd 100644 --- a/apps/emqx_management/src/emqx_mgmt.erl +++ b/apps/emqx_management/src/emqx_mgmt.erl @@ -150,7 +150,7 @@ node_info() -> get_sys_memory() -> case os:type() of {unix, linux} -> - load_ctl:get_sys_memory(); + emqx_mgmt_sys_memory:get_sys_memory(); _ -> {0, 0} end. diff --git a/apps/emqx_management/src/emqx_mgmt_sup.erl b/apps/emqx_management/src/emqx_mgmt_sup.erl index 329532fa1..fa49c02a6 100644 --- a/apps/emqx_management/src/emqx_mgmt_sup.erl +++ b/apps/emqx_management/src/emqx_mgmt_sup.erl @@ -26,4 +26,15 @@ start_link() -> supervisor:start_link({local, ?MODULE}, ?MODULE, []). init([]) -> - {ok, {{one_for_one, 1, 5}, []}}. + LC = child_spec(emqx_mgmt_sys_memory, 5000, worker), + {ok, {{one_for_one, 1, 5}, [LC]}}. + +child_spec(Mod, Shutdown, Type) -> + #{ + id => Mod, + start => {Mod, start_link, []}, + restart => permanent, + shutdown => Shutdown, + type => Type, + modules => [Mod] + }. diff --git a/apps/emqx_management/src/emqx_mgmt_sys_memory.erl b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl new file mode 100644 index 000000000..d393caabe --- /dev/null +++ b/apps/emqx_management/src/emqx_mgmt_sys_memory.erl @@ -0,0 +1,79 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_mgmt_sys_memory). + +-behaviour(gen_server). +-define(SYS_MEMORY_CACHE_KEY, ?MODULE). +-define(TIMEOUT, 3000). + +-export([start_link/0, get_sys_memory/0, get_sys_memory/1]). +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). + +get_sys_memory() -> + get_sys_memory(?TIMEOUT). + +get_sys_memory(Timeout) -> + try + gen_server:call(?MODULE, get_sys_memory, Timeout) + catch + exit:{timeout, _} -> + get_memory_from_cache() + end. + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + {ok, #{last_time => 0}}. + +handle_call(get_sys_memory, _From, State = #{last_time := LastTime}) -> + Now = erlang:system_time(millisecond), + case Now - LastTime >= ?TIMEOUT of + true -> + Memory = load_ctl:get_sys_memory(), + persistent_term:put(?SYS_MEMORY_CACHE_KEY, Memory), + {reply, Memory, State#{last_time => Now}}; + false -> + {reply, get_memory_from_cache(), State} + end; +handle_call(_Request, _From, State = #{}) -> + {reply, ok, State}. + +handle_cast(_Request, State = #{}) -> + {noreply, State}. + +handle_info(_Info, State = #{}) -> + {noreply, State}. + +terminate(_Reason, _State = #{}) -> + ok. + +code_change(_OldVsn, State = #{}, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +get_memory_from_cache() -> + persistent_term:get(?SYS_MEMORY_CACHE_KEY, {0, 0}).