From ba6382adaed40af8c07d2fa857e673501d39572a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 23 May 2024 14:54:01 +0200 Subject: [PATCH] fix(dsrepl): trigger "last-resort" pending transitions handler when idle This is a hack to work around the unintended issues causing shard allocator to become idle even when there are pending transitions. --- .../src/emqx_ds_replication_shard_allocator.erl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl index d198b2ddd..cbaafc718 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl @@ -41,6 +41,7 @@ -define(shard_meta(DB, SHARD), {?MODULE, DB, SHARD}). -define(ALLOCATE_RETRY_TIMEOUT, 1_000). +-define(TRIGGER_PENDING_TIMEOUT, 60_000). -define(TRANS_RETRY_TIMEOUT, 5_000). -define(CRASH_RETRY_DELAY, 20_000). @@ -106,7 +107,7 @@ handle_call(_Call, _From, State) -> -spec handle_cast(_Cast, state()) -> {noreply, state()}. handle_cast(#trigger_transitions{}, State) -> - {noreply, handle_pending_transitions(State)}; + {noreply, handle_pending_transitions(State), ?TRIGGER_PENDING_TIMEOUT}; handle_cast(_Cast, State) -> {noreply, State}. @@ -118,13 +119,15 @@ handle_cast(_Cast, State) -> handle_info({timeout, _TRef, allocate}, State) -> {noreply, handle_allocate_shards(State)}; handle_info({changed, {shard, DB, Shard}}, State = #{db := DB}) -> - {noreply, handle_shard_changed(Shard, State)}; + {noreply, handle_shard_changed(Shard, State), ?TRIGGER_PENDING_TIMEOUT}; handle_info({changed, _}, State) -> - {noreply, State}; + {noreply, State, ?TRIGGER_PENDING_TIMEOUT}; handle_info({'EXIT', Pid, Reason}, State) -> - {noreply, handle_exit(Pid, Reason, State)}; + {noreply, handle_exit(Pid, Reason, State), ?TRIGGER_PENDING_TIMEOUT}; +handle_info(timeout, State) -> + {noreply, handle_pending_transitions(State), ?TRIGGER_PENDING_TIMEOUT}; handle_info(_Info, State) -> - {noreply, State}. + {noreply, State, ?TRIGGER_PENDING_TIMEOUT}. -spec terminate(_Reason, state()) -> _Ok. terminate(_Reason, State = #{db := DB, shards := Shards}) ->