feat(emqx_resource): add health_ckeck process, it will periodically perform health checks, and print error logs and generate alarms when the checks fail.
This commit is contained in:
parent
888e7fd727
commit
c7693246fe
|
@ -0,0 +1,43 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2020-2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
-module(emqx_resource_health_check).
|
||||||
|
|
||||||
|
-export([child_spec/2]).
|
||||||
|
|
||||||
|
-export([start_link/2]).
|
||||||
|
|
||||||
|
-export([health_check/2]).
|
||||||
|
|
||||||
|
child_spec(Name, Sleep) ->
|
||||||
|
#{id => {health_check, Name},
|
||||||
|
start => {?MODULE, start_link, [Name, Sleep]},
|
||||||
|
restart => transient,
|
||||||
|
shutdown => 5000, type => worker, modules => [?MODULE]}.
|
||||||
|
|
||||||
|
start_link(Name, Sleep) ->
|
||||||
|
Pid = proc_lib:spawn_link(?MODULE, health_check, [Name, Sleep]),
|
||||||
|
{ok, Pid}.
|
||||||
|
|
||||||
|
health_check(Name, SleepTime) ->
|
||||||
|
timer:sleep(SleepTime),
|
||||||
|
case emqx_resource:health_check(Name) of
|
||||||
|
ok ->
|
||||||
|
emqx_alarm:deactivate(Name);
|
||||||
|
{error, _} ->
|
||||||
|
emqx_alarm:activate(Name, #{name => Name},
|
||||||
|
<<Name/binary, " health check failed">>)
|
||||||
|
end,
|
||||||
|
health_check(Name, SleepTime).
|
|
@ -0,0 +1,40 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2020-2021 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
-module(emqx_resource_health_check_sup).
|
||||||
|
|
||||||
|
-behaviour(supervisor).
|
||||||
|
|
||||||
|
-export([start_link/0]).
|
||||||
|
|
||||||
|
-export([init/1,
|
||||||
|
create_health_check_process/2,
|
||||||
|
delete_health_check_process/1]).
|
||||||
|
|
||||||
|
start_link() ->
|
||||||
|
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
|
||||||
|
|
||||||
|
init([]) ->
|
||||||
|
SupFlags = #{strategy => one_for_one, intensity => 10, period => 10},
|
||||||
|
{ok, {SupFlags, []}}.
|
||||||
|
|
||||||
|
create_health_check_process(Name, Sleep) ->
|
||||||
|
supervisor:start_child(emqx_resource_health_check_sup,
|
||||||
|
emqx_resource_health_check:child_spec(Name, Sleep)).
|
||||||
|
|
||||||
|
delete_health_check_process(Name) ->
|
||||||
|
_ = supervisor:terminate_child(emqx_resource_health_check_sup, {health_check, Name}),
|
||||||
|
_ = supervisor:delete_child(emqx_resource_health_check_sup, {health_check, Name}),
|
||||||
|
ok.
|
|
@ -173,6 +173,8 @@ do_create(InstId, ResourceType, Config, Opts) ->
|
||||||
%% this is the first time we do health check, this will update the
|
%% this is the first time we do health check, this will update the
|
||||||
%% status and then do ets:insert/2
|
%% status and then do ets:insert/2
|
||||||
_ = do_health_check(Res0#{state => ResourceState}),
|
_ = do_health_check(Res0#{state => ResourceState}),
|
||||||
|
HealthCheckInterval = maps:get(health_check_interval, Opts, 15000),
|
||||||
|
emqx_resource_health_check_sup:create_health_check_process(InstId, HealthCheckInterval),
|
||||||
{ok, force_lookup(InstId)};
|
{ok, force_lookup(InstId)};
|
||||||
{error, Reason} when ForceCreate == true ->
|
{error, Reason} when ForceCreate == true ->
|
||||||
logger:error("start ~ts resource ~ts failed: ~p, "
|
logger:error("start ~ts resource ~ts failed: ~p, "
|
||||||
|
@ -216,7 +218,9 @@ do_remove(Mod, InstId, ResourceState, ClearMetrics) ->
|
||||||
case ClearMetrics of
|
case ClearMetrics of
|
||||||
true -> ok = emqx_plugin_libs_metrics:clear_metrics(resource_metrics, InstId);
|
true -> ok = emqx_plugin_libs_metrics:clear_metrics(resource_metrics, InstId);
|
||||||
false -> ok
|
false -> ok
|
||||||
end.
|
end,
|
||||||
|
_ = emqx_resource_health_check_sup:delete_health_check_process(InstId),
|
||||||
|
ok.
|
||||||
|
|
||||||
do_restart(InstId) ->
|
do_restart(InstId) ->
|
||||||
case lookup(InstId) of
|
case lookup(InstId) of
|
||||||
|
|
|
@ -45,7 +45,12 @@ init([]) ->
|
||||||
restart => transient,
|
restart => transient,
|
||||||
shutdown => 5000, type => worker, modules => [Mod]}
|
shutdown => 5000, type => worker, modules => [Mod]}
|
||||||
end || Idx <- lists:seq(1, ?POOL_SIZE)],
|
end || Idx <- lists:seq(1, ?POOL_SIZE)],
|
||||||
{ok, {SupFlags, [Metrics | ResourceInsts]}}.
|
HealthCheck =
|
||||||
|
#{id => emqx_resource_health_check_sup,
|
||||||
|
start => {emqx_resource_health_check_sup, start_link, []},
|
||||||
|
restart => transient,
|
||||||
|
shutdown => 5000, type => supervisor, modules => [emqx_resource_health_check_sup]},
|
||||||
|
{ok, {SupFlags, [HealthCheck, Metrics | ResourceInsts]}}.
|
||||||
|
|
||||||
%% internal functions
|
%% internal functions
|
||||||
ensure_pool(Pool, Type, Opts) ->
|
ensure_pool(Pool, Type, Opts) ->
|
||||||
|
|
Loading…
Reference in New Issue