From db5d14d5bf5e0d685a259e94a6777f7979b630d0 Mon Sep 17 00:00:00 2001
From: Thales Macedo Garitezi <thalesmg@gmail.com>
Date: Mon, 12 Jun 2023 10:00:11 -0300
Subject: [PATCH] feat(pulsar): retry health check a bit before returning
 (r5.1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes https://emqx.atlassian.net/browse/EMQX-10228

This is a cosmetic fix for the Pulsar Producer bridge health check status.

Pulsar connection process is asynchronous, therefore, when a bridge of this type is
created or updated (which is the same as stopping and re-creating it), the immediate
status will be connecting because it’s indeed still connecting.  The bridge will connect
very soon afterwards (assuming there are no true network/config issue), but having to
refresh the UI to see the new status and/or seeing the resource alarm might annoy users.

This workaround adds a few retries to account for that effect to reduce the probability of
seeing the `connecting` state on such happy-paths.
---
 .../src/emqx_bridge_pulsar_impl_producer.erl      | 15 +++++++++++++--
 changes/ee/feat-11024.en.md                       |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 changes/ee/feat-11024.en.md

diff --git a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl
index b81735f46..16b039ba5 100644
--- a/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl
+++ b/apps/emqx_bridge_pulsar/src/emqx_bridge_pulsar_impl_producer.erl
@@ -64,6 +64,8 @@
 -define(pulsar_client_id, pulsar_client_id).
 -define(pulsar_producers, pulsar_producers).
 
+-define(HEALTH_CHECK_RETRY_TIMEOUT, 4_000).
+
 %%-------------------------------------------------------------------------------------
 %% `emqx_resource' API
 %%-------------------------------------------------------------------------------------
@@ -440,9 +442,18 @@ render(Message, Template) ->
     emqx_placeholder:proc_tmpl(Template, Message, Opts).
 
 get_producer_status(Producers) ->
+    do_get_producer_status(Producers, 0).
+
+do_get_producer_status(_Producers, TimeSpent) when TimeSpent > ?HEALTH_CHECK_RETRY_TIMEOUT ->
+    connecting;
+do_get_producer_status(Producers, TimeSpent) ->
     case pulsar_producers:all_connected(Producers) of
-        true -> connected;
-        false -> connecting
+        true ->
+            connected;
+        false ->
+            Sleep = 200,
+            timer:sleep(Sleep),
+            do_get_producer_status(Producers, TimeSpent + Sleep)
     end.
 
 partition_strategy(key_dispatch) -> first_key_dispatch;
diff --git a/changes/ee/feat-11024.en.md b/changes/ee/feat-11024.en.md
new file mode 100644
index 000000000..b34b88575
--- /dev/null
+++ b/changes/ee/feat-11024.en.md
@@ -0,0 +1 @@
+Added a small improvement to reduce the chance of seeing the `connecting` state when creating/updating a Pulsar Producer bridge.