feat(emqx_resource): add health_ckeck process, it will periodically perform health checks, and print error logs and generate alarms when the checks fail.
This commit is contained in:
parent
888e7fd727
commit
c7693246fe
|
@ -0,0 +1,43 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2020-2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_resource_health_check).
|
||||
|
||||
-export([child_spec/2]).
|
||||
|
||||
-export([start_link/2]).
|
||||
|
||||
-export([health_check/2]).
|
||||
|
||||
child_spec(Name, Sleep) ->
|
||||
#{id => {health_check, Name},
|
||||
start => {?MODULE, start_link, [Name, Sleep]},
|
||||
restart => transient,
|
||||
shutdown => 5000, type => worker, modules => [?MODULE]}.
|
||||
|
||||
start_link(Name, Sleep) ->
|
||||
Pid = proc_lib:spawn_link(?MODULE, health_check, [Name, Sleep]),
|
||||
{ok, Pid}.
|
||||
|
||||
health_check(Name, SleepTime) ->
|
||||
timer:sleep(SleepTime),
|
||||
case emqx_resource:health_check(Name) of
|
||||
ok ->
|
||||
emqx_alarm:deactivate(Name);
|
||||
{error, _} ->
|
||||
emqx_alarm:activate(Name, #{name => Name},
|
||||
<<Name/binary, " health check failed">>)
|
||||
end,
|
||||
health_check(Name, SleepTime).
|
|
@ -0,0 +1,40 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2020-2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_resource_health_check_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1,
|
||||
create_health_check_process/2,
|
||||
delete_health_check_process/1]).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
SupFlags = #{strategy => one_for_one, intensity => 10, period => 10},
|
||||
{ok, {SupFlags, []}}.
|
||||
|
||||
create_health_check_process(Name, Sleep) ->
|
||||
supervisor:start_child(emqx_resource_health_check_sup,
|
||||
emqx_resource_health_check:child_spec(Name, Sleep)).
|
||||
|
||||
delete_health_check_process(Name) ->
|
||||
_ = supervisor:terminate_child(emqx_resource_health_check_sup, {health_check, Name}),
|
||||
_ = supervisor:delete_child(emqx_resource_health_check_sup, {health_check, Name}),
|
||||
ok.
|
|
@ -173,6 +173,8 @@ do_create(InstId, ResourceType, Config, Opts) ->
|
|||
%% this is the first time we do health check, this will update the
|
||||
%% status and then do ets:insert/2
|
||||
_ = do_health_check(Res0#{state => ResourceState}),
|
||||
HealthCheckInterval = maps:get(health_check_interval, Opts, 15000),
|
||||
emqx_resource_health_check_sup:create_health_check_process(InstId, HealthCheckInterval),
|
||||
{ok, force_lookup(InstId)};
|
||||
{error, Reason} when ForceCreate == true ->
|
||||
logger:error("start ~ts resource ~ts failed: ~p, "
|
||||
|
@ -216,7 +218,9 @@ do_remove(Mod, InstId, ResourceState, ClearMetrics) ->
|
|||
case ClearMetrics of
|
||||
true -> ok = emqx_plugin_libs_metrics:clear_metrics(resource_metrics, InstId);
|
||||
false -> ok
|
||||
end.
|
||||
end,
|
||||
_ = emqx_resource_health_check_sup:delete_health_check_process(InstId),
|
||||
ok.
|
||||
|
||||
do_restart(InstId) ->
|
||||
case lookup(InstId) of
|
||||
|
|
|
@ -45,7 +45,12 @@ init([]) ->
|
|||
restart => transient,
|
||||
shutdown => 5000, type => worker, modules => [Mod]}
|
||||
end || Idx <- lists:seq(1, ?POOL_SIZE)],
|
||||
{ok, {SupFlags, [Metrics | ResourceInsts]}}.
|
||||
HealthCheck =
|
||||
#{id => emqx_resource_health_check_sup,
|
||||
start => {emqx_resource_health_check_sup, start_link, []},
|
||||
restart => transient,
|
||||
shutdown => 5000, type => supervisor, modules => [emqx_resource_health_check_sup]},
|
||||
{ok, {SupFlags, [HealthCheck, Metrics | ResourceInsts]}}.
|
||||
|
||||
%% internal functions
|
||||
ensure_pool(Pool, Type, Opts) ->
|
||||
|
|
Loading…
Reference in New Issue