Merge pull request #13425 from kjellwinblad/kjell/review_connector_error_logs_mqtt_etc/EMQX-12555/EMQX-12657

fix: make MQTT connector error log messages easier to understand
This commit is contained in:
Thales Macedo Garitezi 2024-08-05 17:34:13 -03:00 committed by GitHub
commit 613fc644f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 154 additions and 40 deletions

View File

@ -1154,7 +1154,7 @@ t_bridges_probe(Config) ->
?assertMatch(
{ok, 400, #{
<<"code">> := <<"TEST_FAILED">>,
<<"message">> := <<"Connection refused">>
<<"message">> := <<"Connection refused", _/binary>>
}},
request_json(
post,

View File

@ -98,7 +98,7 @@ on_start(ResourceId, #{server := Server} = Conf) ->
server => Server
}};
{error, Reason} ->
{error, Reason}
{error, emqx_maybe:define(explain_error(Reason), Reason)}
end.
on_add_channel(
@ -200,7 +200,7 @@ on_get_channel_status(
} = _State
) when is_map_key(ChannelId, Channels) ->
%% The channel should be ok as long as the MQTT client is ok
connected.
?status_connected.
on_get_channels(ResId) ->
emqx_bridge_v2:get_channels_for_connector(ResId).
@ -356,10 +356,15 @@ on_get_status(_ResourceId, State) ->
Workers = [{Pool, Worker} || {Pool, PN} <- Pools, {_Name, Worker} <- ecpool:workers(PN)],
try emqx_utils:pmap(fun get_status/1, Workers, ?HEALTH_CHECK_TIMEOUT) of
Statuses ->
combine_status(Statuses)
case combine_status(Statuses) of
{Status, Msg} ->
{Status, State, Msg};
Status ->
Status
end
catch
exit:timeout ->
connecting
?status_connecting
end.
get_status({_Pool, Worker}) ->
@ -367,7 +372,7 @@ get_status({_Pool, Worker}) ->
{ok, Client} ->
emqx_bridge_mqtt_ingress:status(Client);
{error, _} ->
disconnected
?status_disconnected
end.
combine_status(Statuses) ->
@ -375,11 +380,25 @@ combine_status(Statuses) ->
%% Natural order of statuses: [connected, connecting, disconnected]
%% * `disconnected` wins over any other status
%% * `connecting` wins over `connected`
case lists:reverse(lists:usort(Statuses)) of
ToStatus = fun
({S, _Reason}) -> S;
(S) when is_atom(S) -> S
end,
CompareFn = fun(S1A, S2A) ->
S1 = ToStatus(S1A),
S2 = ToStatus(S2A),
S1 > S2
end,
case lists:usort(CompareFn, Statuses) of
[{Status, Reason} | _] ->
case explain_error(Reason) of
undefined -> Status;
Msg -> {Status, Msg}
end;
[Status | _] ->
Status;
[] ->
disconnected
?status_disconnected
end.
mk_ingress_config(
@ -514,15 +533,54 @@ connect(Pid, Name) ->
{ok, Pid};
{error, Reason} = Error ->
IsDryRun = emqx_resource:is_dry_run(Name),
?SLOG(?LOG_LEVEL(IsDryRun), #{
msg => "ingress_client_connect_failed",
reason => Reason,
resource_id => Name
}),
log_connect_error_reason(?LOG_LEVEL(IsDryRun), Reason, Name),
_ = catch emqtt:stop(Pid),
Error
end.
log_connect_error_reason(Level, {tcp_closed, _} = Reason, Name) ->
?tp(emqx_bridge_mqtt_connector_tcp_closed, #{}),
?SLOG(Level, #{
msg => "ingress_client_connect_failed",
reason => Reason,
name => Name,
explain => explain_error(Reason)
});
log_connect_error_reason(Level, econnrefused = Reason, Name) ->
?tp(emqx_bridge_mqtt_connector_econnrefused_error, #{}),
?SLOG(Level, #{
msg => "ingress_client_connect_failed",
reason => Reason,
name => Name,
explain => explain_error(Reason)
});
log_connect_error_reason(Level, Reason, Name) ->
?SLOG(Level, #{
msg => "ingress_client_connect_failed",
reason => Reason,
name => Name
}).
explain_error(econnrefused) ->
<<
"Connection refused. "
"This error indicates that your connection attempt to the MQTT server was rejected. "
"In simpler terms, the server you tried to connect to refused your request. "
"There can be multiple reasons for this. "
"For example, the MQTT server you're trying to connect to might be down or not "
"running at all or you might have provided the wrong address "
"or port number for the server."
>>;
explain_error({tcp_closed, _}) ->
<<
"Your MQTT connection attempt was unsuccessful. "
"It might be at its maximum capacity for handling new connections. "
"To diagnose the issue further, you can check the server logs for "
"any specific messages related to the unavailability or connection limits."
>>;
explain_error(_Reason) ->
undefined.
handle_disconnect(_Reason) ->
ok.

View File

@ -19,6 +19,7 @@
-include_lib("emqx/include/logger.hrl").
-include_lib("emqx/include/emqx_mqtt.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-include_lib("emqx_resource/include/emqx_resource.hrl").
%% management APIs
-export([
@ -234,13 +235,13 @@ status(Pid) ->
try
case proplists:get_value(socket, info(Pid)) of
Socket when Socket /= undefined ->
connected;
?status_connected;
undefined ->
connecting
?status_connecting
end
catch
exit:{noproc, _} ->
disconnected
?status_disconnected
end.
%%

View File

@ -1025,6 +1025,8 @@ t_mqtt_conn_bridge_egress_async_reconnect(_) ->
ct:sleep(1000),
%% stop the listener 1883 to make the bridge disconnected
?check_trace(
begin
ok = emqx_listeners:stop_listener('tcp:default'),
ct:sleep(1500),
?assertMatch(
@ -1049,7 +1051,13 @@ t_mqtt_conn_bridge_egress_async_reconnect(_) ->
|| I <- lists:seq(1, N),
Payload <- [integer_to_binary(I)]
],
ok
end,
fun(Trace) ->
?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_econnrefused_error, Trace)),
ok
end
),
ok.
start_publisher(Topic, Interval, CtrlPid) ->

View File

@ -131,6 +131,9 @@ hookpoint(Config) ->
BridgeId = bridge_id(Config),
emqx_bridge_resource:bridge_hookpoint(BridgeId).
simplify_result(Res) ->
emqx_bridge_v2_testlib:simplify_result(Res).
%%------------------------------------------------------------------------------
%% Testcases
%%------------------------------------------------------------------------------
@ -246,3 +249,46 @@ t_receive_via_rule(Config) ->
end
),
ok.
t_connect_with_more_clients_than_the_broker_accepts(Config) ->
Name = ?config(connector_name, Config),
OrgConf = emqx_mgmt_listeners_conf:get_raw(tcp, default),
on_exit(fun() ->
emqx_mgmt_listeners_conf:update(tcp, default, OrgConf)
end),
NewConf = OrgConf#{<<"max_connections">> => 3},
{ok, _} = emqx_mgmt_listeners_conf:update(tcp, default, NewConf),
?check_trace(
#{timetrap => 10_000},
begin
?assertMatch(
{201, #{
<<"status">> := <<"disconnected">>,
<<"status_reason">> :=
<<"Your MQTT connection attempt was unsuccessful", _/binary>>
}},
simplify_result(
emqx_bridge_v2_testlib:create_connector_api(
Config,
#{<<"pool_size">> => 100}
)
)
),
?block_until(#{?snk_kind := emqx_bridge_mqtt_connector_tcp_closed}),
?assertMatch(
{200, #{
<<"status">> := <<"disconnected">>,
<<"status_reason">> :=
<<"Your MQTT connection attempt was unsuccessful", _/binary>>
}},
simplify_result(emqx_bridge_v2_testlib:get_connector_api(mqtt, Name))
),
ok
end,
fun(Trace) ->
?assertMatch([_ | _], ?of_kind(emqx_bridge_mqtt_connector_tcp_closed, Trace)),
ok
end
),
ok.

View File

@ -0,0 +1 @@
The MQTT connector error log messages have been improved to provide clearer and more detailed information.