feat(emqx_resource): add health_ckeck process, it will periodically perform health checks, and print error logs and generate alarms when the checks fail.

This commit is contained in:
EMQ-YangM 2021-12-28 21:38:10 -08:00
parent 888e7fd727
commit c7693246fe
4 changed files with 94 additions and 2 deletions

View File

@ -0,0 +1,43 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2021 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_resource_health_check).
-export([child_spec/2]).
-export([start_link/2]).
-export([health_check/2]).
child_spec(Name, Sleep) ->
#{id => {health_check, Name},
start => {?MODULE, start_link, [Name, Sleep]},
restart => transient,
shutdown => 5000, type => worker, modules => [?MODULE]}.
start_link(Name, Sleep) ->
Pid = proc_lib:spawn_link(?MODULE, health_check, [Name, Sleep]),
{ok, Pid}.
health_check(Name, SleepTime) ->
timer:sleep(SleepTime),
case emqx_resource:health_check(Name) of
ok ->
emqx_alarm:deactivate(Name);
{error, _} ->
emqx_alarm:activate(Name, #{name => Name},
<<Name/binary, " health check failed">>)
end,
health_check(Name, SleepTime).

View File

@ -0,0 +1,40 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2020-2021 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_resource_health_check_sup).
-behaviour(supervisor).
-export([start_link/0]).
-export([init/1,
create_health_check_process/2,
delete_health_check_process/1]).
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
init([]) ->
SupFlags = #{strategy => one_for_one, intensity => 10, period => 10},
{ok, {SupFlags, []}}.
create_health_check_process(Name, Sleep) ->
supervisor:start_child(emqx_resource_health_check_sup,
emqx_resource_health_check:child_spec(Name, Sleep)).
delete_health_check_process(Name) ->
_ = supervisor:terminate_child(emqx_resource_health_check_sup, {health_check, Name}),
_ = supervisor:delete_child(emqx_resource_health_check_sup, {health_check, Name}),
ok.

View File

@ -173,6 +173,8 @@ do_create(InstId, ResourceType, Config, Opts) ->
%% this is the first time we do health check, this will update the
%% status and then do ets:insert/2
_ = do_health_check(Res0#{state => ResourceState}),
HealthCheckInterval = maps:get(health_check_interval, Opts, 15000),
emqx_resource_health_check_sup:create_health_check_process(InstId, HealthCheckInterval),
{ok, force_lookup(InstId)};
{error, Reason} when ForceCreate == true ->
logger:error("start ~ts resource ~ts failed: ~p, "
@ -216,7 +218,9 @@ do_remove(Mod, InstId, ResourceState, ClearMetrics) ->
case ClearMetrics of
true -> ok = emqx_plugin_libs_metrics:clear_metrics(resource_metrics, InstId);
false -> ok
end.
end,
_ = emqx_resource_health_check_sup:delete_health_check_process(InstId),
ok.
do_restart(InstId) ->
case lookup(InstId) of

View File

@ -45,7 +45,12 @@ init([]) ->
restart => transient,
shutdown => 5000, type => worker, modules => [Mod]}
end || Idx <- lists:seq(1, ?POOL_SIZE)],
{ok, {SupFlags, [Metrics | ResourceInsts]}}.
HealthCheck =
#{id => emqx_resource_health_check_sup,
start => {emqx_resource_health_check_sup, start_link, []},
restart => transient,
shutdown => 5000, type => supervisor, modules => [emqx_resource_health_check_sup]},
{ok, {SupFlags, [HealthCheck, Metrics | ResourceInsts]}}.
%% internal functions
ensure_pool(Pool, Type, Opts) ->