Merge pull request #11107 from thalesmg/fix-mongo-health-check-reason-master
fix(mongo): return health check failure reason
This commit is contained in:
commit
2f00cf7f84
|
@ -36,5 +36,14 @@ jobs:
|
||||||
env:
|
env:
|
||||||
MIX_ENV: emqx-enterprise
|
MIX_ENV: emqx-enterprise
|
||||||
PROFILE: emqx-enterprise
|
PROFILE: emqx-enterprise
|
||||||
|
- name: produced lock files
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
if: failure()
|
||||||
|
with:
|
||||||
|
name: produced_lock_files
|
||||||
|
path: |
|
||||||
|
mix.lock
|
||||||
|
rebar.lock
|
||||||
|
retention-days: 1
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|
|
@ -269,28 +269,46 @@ on_query(
|
||||||
{ok, Result}
|
{ok, Result}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
on_get_status(InstId, #{pool_name := PoolName}) ->
|
on_get_status(InstId, State = #{pool_name := PoolName}) ->
|
||||||
case health_check(PoolName) of
|
case health_check(PoolName) of
|
||||||
true ->
|
ok ->
|
||||||
?tp(debug, emqx_connector_mongo_health_check, #{
|
?tp(debug, emqx_connector_mongo_health_check, #{
|
||||||
instance_id => InstId,
|
instance_id => InstId,
|
||||||
status => ok
|
status => ok
|
||||||
}),
|
}),
|
||||||
connected;
|
connected;
|
||||||
false ->
|
{error, Reason} ->
|
||||||
?tp(warning, emqx_connector_mongo_health_check, #{
|
?tp(warning, emqx_connector_mongo_health_check, #{
|
||||||
instance_id => InstId,
|
instance_id => InstId,
|
||||||
|
reason => Reason,
|
||||||
status => failed
|
status => failed
|
||||||
}),
|
}),
|
||||||
disconnected
|
{disconnected, State, Reason}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
health_check(PoolName) ->
|
health_check(PoolName) ->
|
||||||
|
Results =
|
||||||
emqx_resource_pool:health_check_workers(
|
emqx_resource_pool:health_check_workers(
|
||||||
PoolName,
|
PoolName,
|
||||||
fun ?MODULE:check_worker_health/1,
|
fun ?MODULE:check_worker_health/1,
|
||||||
?HEALTH_CHECK_TIMEOUT + timer:seconds(1)
|
?HEALTH_CHECK_TIMEOUT + timer:seconds(1),
|
||||||
).
|
#{return_values => true}
|
||||||
|
),
|
||||||
|
case Results of
|
||||||
|
{ok, []} ->
|
||||||
|
{error, worker_processes_dead};
|
||||||
|
{ok, Values} ->
|
||||||
|
case lists:partition(fun(V) -> V =:= ok end, Values) of
|
||||||
|
{_Ok, []} ->
|
||||||
|
ok;
|
||||||
|
{_Ok, [{error, Reason} | _Errors]} ->
|
||||||
|
{error, Reason};
|
||||||
|
{_Ok, [Error | _Errors]} ->
|
||||||
|
{error, Error}
|
||||||
|
end;
|
||||||
|
{error, Reason} ->
|
||||||
|
{error, Reason}
|
||||||
|
end.
|
||||||
|
|
||||||
%% ===================================================================
|
%% ===================================================================
|
||||||
|
|
||||||
|
@ -302,9 +320,9 @@ check_worker_health(Conn) ->
|
||||||
msg => "mongo_connection_get_status_error",
|
msg => "mongo_connection_get_status_error",
|
||||||
reason => Reason
|
reason => Reason
|
||||||
}),
|
}),
|
||||||
false;
|
{error, Reason};
|
||||||
_ ->
|
_ ->
|
||||||
true
|
ok
|
||||||
catch
|
catch
|
||||||
Class:Error ->
|
Class:Error ->
|
||||||
?SLOG(warning, #{
|
?SLOG(warning, #{
|
||||||
|
@ -312,7 +330,7 @@ check_worker_health(Conn) ->
|
||||||
class => Class,
|
class => Class,
|
||||||
error => Error
|
error => Error
|
||||||
}),
|
}),
|
||||||
false
|
{error, {Class, Error}}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
do_test_query(Conn) ->
|
do_test_query(Conn) ->
|
||||||
|
|
|
@ -20,7 +20,8 @@
|
||||||
start/3,
|
start/3,
|
||||||
stop/1,
|
stop/1,
|
||||||
health_check_workers/2,
|
health_check_workers/2,
|
||||||
health_check_workers/3
|
health_check_workers/3,
|
||||||
|
health_check_workers/4
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-include_lib("emqx/include/logger.hrl").
|
-include_lib("emqx/include/logger.hrl").
|
||||||
|
@ -66,9 +67,13 @@ stop(Name) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
health_check_workers(PoolName, CheckFunc) ->
|
health_check_workers(PoolName, CheckFunc) ->
|
||||||
health_check_workers(PoolName, CheckFunc, ?HEALTH_CHECK_TIMEOUT).
|
health_check_workers(PoolName, CheckFunc, ?HEALTH_CHECK_TIMEOUT, _Opts = #{}).
|
||||||
|
|
||||||
health_check_workers(PoolName, CheckFunc, Timeout) ->
|
health_check_workers(PoolName, CheckFunc, Timeout) ->
|
||||||
|
health_check_workers(PoolName, CheckFunc, Timeout, _Opts = #{}).
|
||||||
|
|
||||||
|
health_check_workers(PoolName, CheckFunc, Timeout, Opts) ->
|
||||||
|
ReturnValues = maps:get(return_values, Opts, false),
|
||||||
Workers = [Worker || {_WorkerName, Worker} <- ecpool:workers(PoolName)],
|
Workers = [Worker || {_WorkerName, Worker} <- ecpool:workers(PoolName)],
|
||||||
DoPerWorker =
|
DoPerWorker =
|
||||||
fun(Worker) ->
|
fun(Worker) ->
|
||||||
|
@ -76,18 +81,26 @@ health_check_workers(PoolName, CheckFunc, Timeout) ->
|
||||||
{ok, Conn} ->
|
{ok, Conn} ->
|
||||||
erlang:is_process_alive(Conn) andalso
|
erlang:is_process_alive(Conn) andalso
|
||||||
ecpool_worker:exec(Worker, CheckFunc, Timeout);
|
ecpool_worker:exec(Worker, CheckFunc, Timeout);
|
||||||
_ ->
|
Error ->
|
||||||
false
|
Error
|
||||||
end
|
end
|
||||||
end,
|
end,
|
||||||
try emqx_utils:pmap(DoPerWorker, Workers, Timeout) of
|
Results =
|
||||||
[_ | _] = Status ->
|
try
|
||||||
lists:all(fun(St) -> St =:= true end, Status);
|
{ok, emqx_utils:pmap(DoPerWorker, Workers, Timeout)}
|
||||||
[] ->
|
|
||||||
false
|
|
||||||
catch
|
catch
|
||||||
exit:timeout ->
|
exit:timeout ->
|
||||||
false
|
{error, timeout}
|
||||||
|
end,
|
||||||
|
case ReturnValues of
|
||||||
|
true ->
|
||||||
|
Results;
|
||||||
|
false ->
|
||||||
|
case Results of
|
||||||
|
{ok, []} -> false;
|
||||||
|
{ok, Rs = [_ | _]} -> lists:all(fun(St) -> St =:= true end, Rs);
|
||||||
|
_ -> false
|
||||||
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
parse_reason({
|
parse_reason({
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Now we return the health check failure reason when probing a MongoDB bridge.
|
|
@ -28,7 +28,8 @@ group_tests() ->
|
||||||
t_setup_via_http_api_and_publish,
|
t_setup_via_http_api_and_publish,
|
||||||
t_payload_template,
|
t_payload_template,
|
||||||
t_collection_template,
|
t_collection_template,
|
||||||
t_mongo_date_rule_engine_functions
|
t_mongo_date_rule_engine_functions,
|
||||||
|
t_get_status_server_selection_too_short
|
||||||
].
|
].
|
||||||
|
|
||||||
groups() ->
|
groups() ->
|
||||||
|
@ -317,6 +318,27 @@ send_message(Config, Payload) ->
|
||||||
BridgeID = emqx_bridge_resource:bridge_id(Type, Name),
|
BridgeID = emqx_bridge_resource:bridge_id(Type, Name),
|
||||||
emqx_bridge:send_message(BridgeID, Payload).
|
emqx_bridge:send_message(BridgeID, Payload).
|
||||||
|
|
||||||
|
probe_bridge_api(Config) ->
|
||||||
|
probe_bridge_api(Config, _Overrides = #{}).
|
||||||
|
|
||||||
|
probe_bridge_api(Config, Overrides) ->
|
||||||
|
Name = ?config(mongo_name, Config),
|
||||||
|
TypeBin = mongo_type_bin(?config(mongo_type, Config)),
|
||||||
|
MongoConfig0 = ?config(mongo_config, Config),
|
||||||
|
MongoConfig = emqx_utils_maps:deep_merge(MongoConfig0, Overrides),
|
||||||
|
Params = MongoConfig#{<<"type">> => TypeBin, <<"name">> => Name},
|
||||||
|
Path = emqx_mgmt_api_test_util:api_path(["bridges_probe"]),
|
||||||
|
AuthHeader = emqx_mgmt_api_test_util:auth_header_(),
|
||||||
|
Opts = #{return_all => true},
|
||||||
|
ct:pal("probing bridge (via http): ~p", [Params]),
|
||||||
|
Res =
|
||||||
|
case emqx_mgmt_api_test_util:request_api(post, Path, "", AuthHeader, Params, Opts) of
|
||||||
|
{ok, {{_, 204, _}, _Headers, _Body0} = Res0} -> {ok, Res0};
|
||||||
|
Error -> Error
|
||||||
|
end,
|
||||||
|
ct:pal("bridge probe result: ~p", [Res]),
|
||||||
|
Res.
|
||||||
|
|
||||||
%%------------------------------------------------------------------------------
|
%%------------------------------------------------------------------------------
|
||||||
%% Testcases
|
%% Testcases
|
||||||
%%------------------------------------------------------------------------------
|
%%------------------------------------------------------------------------------
|
||||||
|
@ -453,3 +475,21 @@ t_mongo_date_rule_engine_functions(Config) ->
|
||||||
find_all_wait_until_non_empty(Config)
|
find_all_wait_until_non_empty(Config)
|
||||||
),
|
),
|
||||||
ok.
|
ok.
|
||||||
|
|
||||||
|
t_get_status_server_selection_too_short(Config) ->
|
||||||
|
Res = probe_bridge_api(
|
||||||
|
Config,
|
||||||
|
#{
|
||||||
|
<<"topology">> => #{<<"server_selection_timeout_ms">> => <<"1ms">>}
|
||||||
|
}
|
||||||
|
),
|
||||||
|
?assertMatch({error, {{_, 400, _}, _Headers, _Body}}, Res),
|
||||||
|
{error, {{_, 400, _}, _Headers, Body}} = Res,
|
||||||
|
?assertMatch(
|
||||||
|
#{
|
||||||
|
<<"code">> := <<"TEST_FAILED">>,
|
||||||
|
<<"message">> := <<"timeout">>
|
||||||
|
},
|
||||||
|
emqx_utils_json:decode(Body)
|
||||||
|
),
|
||||||
|
ok.
|
||||||
|
|
Loading…
Reference in New Issue