fix(bridge): check health immediately after updated

This commit is contained in:
Shawn 2022-01-02 22:45:32 +08:00
parent a64b29ff76
commit 11736dc1d7
3 changed files with 24 additions and 23 deletions

View File

@ -247,7 +247,7 @@ update(Type, Name, {OldConf, Conf}) ->
?SLOG(warning, #{ msg => "updating_a_non-exist_bridge_need_create_a_new_one"
, type => Type, name => Name, config => Conf}),
create(Type, Name, Conf);
{error, Reason} -> {update_bridge_failed, Reason}
{error, Reason} -> {error, {update_bridge_failed, Reason}}
end;
true ->
%% we don't need to recreate the bridge if this config change is only to

View File

@ -36,28 +36,25 @@ start_link(Name, Sleep) ->
{ok, Pid}.
create_checker(Name, Sleep) ->
create_checker(Name, Sleep, false).
create_checker(Name, Sleep, Retry) ->
case supervisor:start_child(?SUP, child_spec(Name, Sleep)) of
{ok, _} -> ok;
{error, already_present} -> ok;
{error, {already_started, _}} ->
{error, {already_started, _}} when Retry == false ->
ok = delete_checker(Name),
create_checker(Name, Sleep);
create_checker(Name, Sleep, true);
Error -> Error
end.
delete_checker(Name) ->
case supervisor:terminate_child(?SUP, {health_check, Name}) of
ok ->
case supervisor:delete_child(?SUP, {health_check, Name}) of
{error, not_found} -> ok;
Error -> Error
end;
{error, not_found} -> ok;
case supervisor:terminate_child(?SUP, ?ID(Name)) of
ok -> supervisor:delete_child(?SUP, ?ID(Name));
Error -> Error
end.
health_check(Name, SleepTime) ->
timer:sleep(SleepTime),
case emqx_resource:health_check(Name) of
ok ->
emqx_alarm:deactivate(Name);
@ -65,4 +62,5 @@ health_check(Name, SleepTime) ->
emqx_alarm:activate(Name, #{name => Name},
<<Name/binary, " health check failed">>)
end,
timer:sleep(SleepTime),
health_check(Name, SleepTime).

View File

@ -140,11 +140,14 @@ code_change(_OldVsn, State, _Extra) ->
%%------------------------------------------------------------------------------
%% suppress the race condition check, as these functions are protected in gproc workers
-dialyzer({nowarn_function, [do_recreate/4,
do_create/4,
do_restart/2,
do_stop/1,
do_health_check/1]}).
-dialyzer({nowarn_function, [ do_recreate/4
, do_create/4
, do_restart/2
, do_start/4
, do_stop/1
, do_health_check/1
, start_and_check/5
]}).
do_recreate(InstId, ResourceType, NewConfig, Opts) ->
case lookup(InstId) of
@ -183,12 +186,12 @@ do_create(InstId, ResourceType, Config, Opts) ->
do_create_dry_run(ResourceType, Config) ->
InstId = make_test_id(),
Opts = #{async_create => false},
case do_create(InstId, ResourceType, Config, Opts) of
{ok, Data} ->
Return = do_health_check(Data),
_ = do_remove(Data),
Return;
case emqx_resource:call_start(InstId, ResourceType, Config) of
{ok, ResourceState} ->
case emqx_resource:call_health_check(InstId, ResourceType, ResourceState) of
{ok, _} -> ok;
{error, Reason, _} -> {error, Reason}
end;
{error, Reason} ->
{error, Reason}
end.
@ -252,7 +255,7 @@ do_stop(#{state := undefined}) ->
ok;
do_stop(#{id := InstId, mod := Mod, state := ResourceState} = Data) ->
_ = emqx_resource:call_stop(InstId, Mod, ResourceState),
ok = emqx_resource_health_check:delete_checker(InstId),
_ = emqx_resource_health_check:delete_checker(InstId),
ets:insert(emqx_resource_instance, {InstId, Data#{status => stopped}}),
ok.