From 6190192cbcd9f90207b8a67cebc9697ba5f15ab9 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 20 Jun 2024 11:26:53 +0200 Subject: [PATCH 1/3] fix: redis connector should not timeout because no username and password A redis connector of type single or sentinel always got a timeout error when doing the connector test in the dashboard if no username or password was provided. This commit makes sure that the user instead get an informative error message. Additionally, this commit adds more more error information for all redis connector types. Fixes: https://emqx.atlassian.net/browse/EMQX-12557 --- .../test/emqx_bridge_v2_testlib.erl | 5 +- .../test/emqx_bridge_v2_redis_SUITE.erl | 39 +++++++++++++ apps/emqx_redis/src/emqx_redis.app.src | 2 +- apps/emqx_redis/src/emqx_redis.erl | 58 +++++++++++++++---- 4 files changed, 89 insertions(+), 15 deletions(-) diff --git a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl index 82858f00b..8cf8730b0 100644 --- a/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl +++ b/apps/emqx_bridge/test/emqx_bridge_v2_testlib.erl @@ -945,6 +945,7 @@ t_on_get_status(Config, Opts) -> ProxyHost = ?config(proxy_host, Config), ProxyName = ?config(proxy_name, Config), FailureStatus = maps:get(failure_status, Opts, disconnected), + NormalStatus = maps:get(normal_status, Opts, connected), ?assertMatch({ok, _}, create_bridge(Config)), ResourceId = resource_id(Config), %% Since the connection process is async, we give it some time to @@ -952,7 +953,7 @@ t_on_get_status(Config, Opts) -> ?retry( _Sleep = 1_000, _Attempts = 20, - ?assertEqual({ok, connected}, emqx_resource_manager:health_check(ResourceId)) + ?assertEqual({ok, NormalStatus}, emqx_resource_manager:health_check(ResourceId)) ), case ProxyHost of undefined -> @@ -971,7 +972,7 @@ t_on_get_status(Config, Opts) -> ?retry( _Sleep = 1_000, _Attempts = 20, - ?assertEqual({ok, connected}, emqx_resource_manager:health_check(ResourceId)) + ?assertEqual({ok, NormalStatus}, emqx_resource_manager:health_check(ResourceId)) ) end, ok. diff --git a/apps/emqx_bridge_redis/test/emqx_bridge_v2_redis_SUITE.erl b/apps/emqx_bridge_redis/test/emqx_bridge_v2_redis_SUITE.erl index 7d3003bfa..725d24a88 100644 --- a/apps/emqx_bridge_redis/test/emqx_bridge_v2_redis_SUITE.erl +++ b/apps/emqx_bridge_redis/test/emqx_bridge_v2_redis_SUITE.erl @@ -19,6 +19,7 @@ -include_lib("eunit/include/eunit.hrl"). -include_lib("common_test/include/ct.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). -define(BRIDGE_TYPE, redis). -define(BRIDGE_TYPE_BIN, <<"redis">>). @@ -46,6 +47,7 @@ matrix_testcases() -> t_start_stop, t_create_via_http, t_on_get_status, + t_on_get_status_no_username_pass, t_sync_query, t_map_to_redis_hset_args ]. @@ -325,6 +327,43 @@ t_on_get_status(Config) when is_list(Config) -> emqx_bridge_v2_testlib:t_on_get_status(Config, #{failure_status => connecting}), ok. +t_on_get_status_no_username_pass(matrix) -> + {on_get_status, [ + [single, tcp], + [cluster, tcp], + [sentinel, tcp] + ]}; +t_on_get_status_no_username_pass(Config0) when is_list(Config0) -> + ConnectorConfig0 = ?config(connector_config, Config0), + ConnectorConfig1 = emqx_utils_maps:deep_put( + [<<"parameters">>, <<"password">>], ConnectorConfig0, <<"">> + ), + ConnectorConfig2 = emqx_utils_maps:deep_put( + [<<"parameters">>, <<"username">>], ConnectorConfig1, <<"">> + ), + Config1 = proplists:delete(connector_config, Config0), + Config2 = [{connector_config, ConnectorConfig2} | Config1], + ?check_trace( + emqx_bridge_v2_testlib:t_on_get_status( + Config2, + #{ + failure_status => disconnected, + normal_status => disconnected + } + ), + fun(ok, Trace) -> + case ?config(redis_type, Config2) of + single -> + ?assertMatch([_ | _], ?of_kind(emqx_redis_auth_required_error, Trace)); + sentinel -> + ?assertMatch([_ | _], ?of_kind(emqx_redis_auth_required_error, Trace)); + cluster -> + ok + end + end + ), + ok. + t_sync_query(matrix) -> {sync_query, [ [single, tcp], diff --git a/apps/emqx_redis/src/emqx_redis.app.src b/apps/emqx_redis/src/emqx_redis.app.src index 660c490e6..02a251637 100644 --- a/apps/emqx_redis/src/emqx_redis.app.src +++ b/apps/emqx_redis/src/emqx_redis.app.src @@ -1,6 +1,6 @@ {application, emqx_redis, [ {description, "EMQX Redis Database Connector"}, - {vsn, "0.1.5"}, + {vsn, "0.1.6"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_redis/src/emqx_redis.erl b/apps/emqx_redis/src/emqx_redis.erl index 17a0ede49..3abff99eb 100644 --- a/apps/emqx_redis/src/emqx_redis.erl +++ b/apps/emqx_redis/src/emqx_redis.erl @@ -19,6 +19,7 @@ -include_lib("typerefl/include/types.hrl"). -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/logger.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). -export([namespace/0, roots/0, fields/1, redis_fields/0, desc/1]). @@ -231,7 +232,7 @@ is_unrecoverable_error({error, invalid_cluster_command}) -> is_unrecoverable_error(_) -> false. -on_get_status(_InstId, #{type := cluster, pool_name := PoolName}) -> +on_get_status(_InstId, #{type := cluster, pool_name := PoolName} = State) -> case eredis_cluster:pool_exists(PoolName) of true -> %% eredis_cluster has null slot even pool_exists when emqx start before redis cluster. @@ -242,24 +243,57 @@ on_get_status(_InstId, #{type := cluster, pool_name := PoolName}) -> [] -> disconnected; [_ | _] -> - Health = eredis_cluster:ping_all(PoolName), - status_result(Health) + do_cluster_status_check(PoolName, State) end; false -> disconnected end; -on_get_status(_InstId, #{pool_name := PoolName}) -> - Health = emqx_resource_pool:health_check_workers(PoolName, fun ?MODULE:do_get_status/1), - status_result(Health). +on_get_status(_InstId, #{pool_name := PoolName} = State) -> + Timeout = 1000, + Workers = [Worker || {_WorkerName, Worker} <- ecpool:workers(PoolName)], + DoPerWorker = + fun(Worker) -> + case ecpool_worker:client(Worker) of + {ok, Conn} -> + erlang:is_process_alive(Conn) andalso + ecpool_worker:exec(Worker, fun ?MODULE:do_get_status/1, Timeout); + Error -> + Error + end + end, + {ok, Results} = + try + {ok, emqx_utils:pmap(DoPerWorker, Workers, Timeout)} + catch + exit:timeout -> + {error, timeout} + end, + sum_worker_results(Results, State). + +do_cluster_status_check(Pool, State) -> + Pongs = eredis_cluster:qa(Pool, [<<"PING">>]), + sum_worker_results(Pongs, State). do_get_status(Conn) -> - case eredis:q(Conn, ["PING"]) of - {ok, _} -> true; - _ -> false - end. + eredis:q(Conn, ["PING"]). -status_result(_Status = true) -> connected; -status_result(_Status = false) -> connecting. +sum_worker_results([], _State) -> + connected; +sum_worker_results([{error, <<"NOAUTH Authentication required.">>} = Error | _Rest], State) -> + ?tp(emqx_redis_auth_required_error, #{}), + %% This requires user action to fix so we set the status to disconnected + {disconnected, State, Error}; +sum_worker_results([{ok, _} | Rest], State) -> + sum_worker_results(Rest, State); +sum_worker_results([Error | _Rest], State) -> + ?SLOG( + warning, + #{ + msg => "emqx_redis_check_status_error", + error => Error + } + ), + {connecting, State, Error}. do_cmd(PoolName, cluster, {cmd, Command}) -> eredis_cluster:q(PoolName, Command); From 31509f02cc05b994f24fd04a99a375fb371d0159 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 20 Jun 2024 11:38:31 +0200 Subject: [PATCH 2/3] docs: add change log entry --- changes/ee/fix-13305.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ee/fix-13305.en.md diff --git a/changes/ee/fix-13305.en.md b/changes/ee/fix-13305.en.md new file mode 100644 index 000000000..1936a49e3 --- /dev/null +++ b/changes/ee/fix-13305.en.md @@ -0,0 +1 @@ +Improved error handling for Redis connectors. Previously, Redis connectors of type single or sentinel would always encounter a timeout error during the connector test in the dashboard if no username or password was provided. This update ensures that users now receive an informative error message in such scenarios. Additionally, more detailed error information has been added for all Redis connector types to enhance diagnostics and troubleshooting. From 130571b56e98d398bc4de46ce5d9bf2361ba2904 Mon Sep 17 00:00:00 2001 From: Kjell Winblad Date: Thu, 20 Jun 2024 14:40:18 +0200 Subject: [PATCH 3/3] fix: code improvements thanks to comments from @thalesmg --- apps/emqx_redis/src/emqx_redis.erl | 43 +++++++++++++----------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/apps/emqx_redis/src/emqx_redis.erl b/apps/emqx_redis/src/emqx_redis.erl index 3abff99eb..059e9aa23 100644 --- a/apps/emqx_redis/src/emqx_redis.erl +++ b/apps/emqx_redis/src/emqx_redis.erl @@ -20,6 +20,7 @@ -include_lib("hocon/include/hoconsc.hrl"). -include_lib("emqx/include/logger.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("emqx_resource/include/emqx_resource.hrl"). -export([namespace/0, roots/0, fields/1, redis_fields/0, desc/1]). @@ -241,34 +242,26 @@ on_get_status(_InstId, #{type := cluster, pool_name := PoolName} = State) -> %% In this case, we can directly consider it as a disconnect and then proceed to reconnect. case eredis_cluster_monitor:get_all_pools(PoolName) of [] -> - disconnected; + ?status_disconnected; [_ | _] -> do_cluster_status_check(PoolName, State) end; false -> - disconnected + ?status_disconnected end; on_get_status(_InstId, #{pool_name := PoolName} = State) -> - Timeout = 1000, - Workers = [Worker || {_WorkerName, Worker} <- ecpool:workers(PoolName)], - DoPerWorker = - fun(Worker) -> - case ecpool_worker:client(Worker) of - {ok, Conn} -> - erlang:is_process_alive(Conn) andalso - ecpool_worker:exec(Worker, fun ?MODULE:do_get_status/1, Timeout); - Error -> - Error - end - end, - {ok, Results} = - try - {ok, emqx_utils:pmap(DoPerWorker, Workers, Timeout)} - catch - exit:timeout -> - {error, timeout} - end, - sum_worker_results(Results, State). + HealthCheckResoults = emqx_resource_pool:health_check_workers( + PoolName, + fun ?MODULE:do_get_status/1, + emqx_resource_pool:health_check_timeout(), + #{return_values => true} + ), + case HealthCheckResoults of + {ok, Results} -> + sum_worker_results(Results, State); + Error -> + {?status_disconnected, State, Error} + end. do_cluster_status_check(Pool, State) -> Pongs = eredis_cluster:qa(Pool, [<<"PING">>]), @@ -278,11 +271,11 @@ do_get_status(Conn) -> eredis:q(Conn, ["PING"]). sum_worker_results([], _State) -> - connected; + ?status_connected; sum_worker_results([{error, <<"NOAUTH Authentication required.">>} = Error | _Rest], State) -> ?tp(emqx_redis_auth_required_error, #{}), %% This requires user action to fix so we set the status to disconnected - {disconnected, State, Error}; + {?status_disconnected, State, {unhealthy_target, Error}}; sum_worker_results([{ok, _} | Rest], State) -> sum_worker_results(Rest, State); sum_worker_results([Error | _Rest], State) -> @@ -293,7 +286,7 @@ sum_worker_results([Error | _Rest], State) -> error => Error } ), - {connecting, State, Error}. + {?status_connecting, State, Error}. do_cmd(PoolName, cluster, {cmd, Command}) -> eredis_cluster:q(PoolName, Command);