fix(bridge): check health immediately after updated

This commit is contained in:
Shawn 2022-01-02 22:45:32 +08:00
parent a64b29ff76
commit 11736dc1d7
3 changed files with 24 additions and 23 deletions

View File

@ -247,7 +247,7 @@ update(Type, Name, {OldConf, Conf}) ->
?SLOG(warning, #{ msg => "updating_a_non-exist_bridge_need_create_a_new_one" ?SLOG(warning, #{ msg => "updating_a_non-exist_bridge_need_create_a_new_one"
, type => Type, name => Name, config => Conf}), , type => Type, name => Name, config => Conf}),
create(Type, Name, Conf); create(Type, Name, Conf);
{error, Reason} -> {update_bridge_failed, Reason} {error, Reason} -> {error, {update_bridge_failed, Reason}}
end; end;
true -> true ->
%% we don't need to recreate the bridge if this config change is only to %% we don't need to recreate the bridge if this config change is only to

View File

@ -36,28 +36,25 @@ start_link(Name, Sleep) ->
{ok, Pid}. {ok, Pid}.
create_checker(Name, Sleep) -> create_checker(Name, Sleep) ->
create_checker(Name, Sleep, false).
create_checker(Name, Sleep, Retry) ->
case supervisor:start_child(?SUP, child_spec(Name, Sleep)) of case supervisor:start_child(?SUP, child_spec(Name, Sleep)) of
{ok, _} -> ok; {ok, _} -> ok;
{error, already_present} -> ok; {error, already_present} -> ok;
{error, {already_started, _}} -> {error, {already_started, _}} when Retry == false ->
ok = delete_checker(Name), ok = delete_checker(Name),
create_checker(Name, Sleep); create_checker(Name, Sleep, true);
Error -> Error Error -> Error
end. end.
delete_checker(Name) -> delete_checker(Name) ->
case supervisor:terminate_child(?SUP, {health_check, Name}) of case supervisor:terminate_child(?SUP, ?ID(Name)) of
ok -> ok -> supervisor:delete_child(?SUP, ?ID(Name));
case supervisor:delete_child(?SUP, {health_check, Name}) of
{error, not_found} -> ok;
Error -> Error
end;
{error, not_found} -> ok;
Error -> Error Error -> Error
end. end.
health_check(Name, SleepTime) -> health_check(Name, SleepTime) ->
timer:sleep(SleepTime),
case emqx_resource:health_check(Name) of case emqx_resource:health_check(Name) of
ok -> ok ->
emqx_alarm:deactivate(Name); emqx_alarm:deactivate(Name);
@ -65,4 +62,5 @@ health_check(Name, SleepTime) ->
emqx_alarm:activate(Name, #{name => Name}, emqx_alarm:activate(Name, #{name => Name},
<<Name/binary, " health check failed">>) <<Name/binary, " health check failed">>)
end, end,
timer:sleep(SleepTime),
health_check(Name, SleepTime). health_check(Name, SleepTime).

View File

@ -140,11 +140,14 @@ code_change(_OldVsn, State, _Extra) ->
%%------------------------------------------------------------------------------ %%------------------------------------------------------------------------------
%% suppress the race condition check, as these functions are protected in gproc workers %% suppress the race condition check, as these functions are protected in gproc workers
-dialyzer({nowarn_function, [do_recreate/4, -dialyzer({nowarn_function, [ do_recreate/4
do_create/4, , do_create/4
do_restart/2, , do_restart/2
do_stop/1, , do_start/4
do_health_check/1]}). , do_stop/1
, do_health_check/1
, start_and_check/5
]}).
do_recreate(InstId, ResourceType, NewConfig, Opts) -> do_recreate(InstId, ResourceType, NewConfig, Opts) ->
case lookup(InstId) of case lookup(InstId) of
@ -183,12 +186,12 @@ do_create(InstId, ResourceType, Config, Opts) ->
do_create_dry_run(ResourceType, Config) -> do_create_dry_run(ResourceType, Config) ->
InstId = make_test_id(), InstId = make_test_id(),
Opts = #{async_create => false}, case emqx_resource:call_start(InstId, ResourceType, Config) of
case do_create(InstId, ResourceType, Config, Opts) of {ok, ResourceState} ->
{ok, Data} -> case emqx_resource:call_health_check(InstId, ResourceType, ResourceState) of
Return = do_health_check(Data), {ok, _} -> ok;
_ = do_remove(Data), {error, Reason, _} -> {error, Reason}
Return; end;
{error, Reason} -> {error, Reason} ->
{error, Reason} {error, Reason}
end. end.
@ -252,7 +255,7 @@ do_stop(#{state := undefined}) ->
ok; ok;
do_stop(#{id := InstId, mod := Mod, state := ResourceState} = Data) -> do_stop(#{id := InstId, mod := Mod, state := ResourceState} = Data) ->
_ = emqx_resource:call_stop(InstId, Mod, ResourceState), _ = emqx_resource:call_stop(InstId, Mod, ResourceState),
ok = emqx_resource_health_check:delete_checker(InstId), _ = emqx_resource_health_check:delete_checker(InstId),
ets:insert(emqx_resource_instance, {InstId, Data#{status => stopped}}), ets:insert(emqx_resource_instance, {InstId, Data#{status => stopped}}),
ok. ok.