From db5d14d5bf5e0d685a259e94a6777f7979b630d0 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Mon, 12 Jun 2023 10:00:11 -0300 Subject: [PATCH] feat(pulsar): retry health check a bit before returning (r5.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes https://emqx.atlassian.net/browse/EMQX-10228 This is a cosmetic fix for the Pulsar Producer bridge health check status. Pulsar connection process is asynchronous, therefore, when a bridge of this type is created or updated (which is the same as stopping and re-creating it), the immediate status will be connecting because it’s indeed still connecting. The bridge will connect very soon afterwards (assuming there are no true network/config issue), but having to refresh the UI to see the new status and/or seeing the resource alarm might annoy users. This workaround adds a few retries to account for that effect to reduce the probability of seeing the `connecting` state on such happy-paths. --- .../src/emqx_bridge_pulsar_impl_producer.erl | 15 +++++++++++++-- changes/ee/feat-11024.en.md | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 changes/ee/feat-11024.en.md diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl index b81735f46..16b039ba5 100644 --- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl +++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl @@ -64,6 +64,8 @@ -define(pulsar_client_id, pulsar_client_id). -define(pulsar_producers, pulsar_producers). +-define(HEALTH_CHECK_RETRY_TIMEOUT, 4_000). + %%------------------------------------------------------------------------------------- %% `emqx_resource' API %%------------------------------------------------------------------------------------- @@ -440,9 +442,18 @@ render(Message, Template) -> emqx_placeholder:proc_tmpl(Template, Message, Opts). get_producer_status(Producers) -> + do_get_producer_status(Producers, 0). + +do_get_producer_status(_Producers, TimeSpent) when TimeSpent > ?HEALTH_CHECK_RETRY_TIMEOUT -> + connecting; +do_get_producer_status(Producers, TimeSpent) -> case pulsar_producers:all_connected(Producers) of - true -> connected; - false -> connecting + true -> + connected; + false -> + Sleep = 200, + timer:sleep(Sleep), + do_get_producer_status(Producers, TimeSpent + Sleep) end. partition_strategy(key_dispatch) -> first_key_dispatch; diff --git a/changes/ee/feat-11024.en.md b/changes/ee/feat-11024.en.md new file mode 100644 index 000000000..b34b88575 --- /dev/null +++ b/changes/ee/feat-11024.en.md @@ -0,0 +1 @@ +Added a small improvement to reduce the chance of seeing the `connecting` state when creating/updating a Pulsar Producer bridge.