fix(dsrepl): trigger election for new ra servers unconditionallly
Otherwise we might end up in a situation when there's no member online yet at the time of the election trigger, and the election will never happen.
This commit is contained in:
parent
34be2ea9a0
commit
c4d1360b96
|
@ -341,30 +341,22 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) ->
|
||||||
log_init_args => LogOpts
|
log_init_args => LogOpts
|
||||||
})
|
})
|
||||||
end,
|
end,
|
||||||
case Servers of
|
%% NOTE
|
||||||
[LocalServer | _] ->
|
%% Triggering election is necessary when a new consensus group is being brought up.
|
||||||
%% TODO
|
%% TODO
|
||||||
%% Not super robust, but we probably don't expect nodes to be down
|
%% It's probably a good idea to rebalance leaders across the cluster from time to
|
||||||
%% when we bring up a fresh consensus group. Triggering election
|
%% time. There's `ra:transfer_leadership/2` for that.
|
||||||
%% is not really required otherwise.
|
try Bootstrap andalso ra:trigger_election(LocalServer, _Timeout = 1_000) of
|
||||||
%% TODO
|
false ->
|
||||||
%% Ensure that doing that on node restart does not disrupt consensus.
|
ok;
|
||||||
%% Edit: looks like it doesn't, this could actually be quite useful
|
ok ->
|
||||||
%% to "steal" leadership from nodes that have too much leader load.
|
ok
|
||||||
%% TODO
|
|
||||||
%% It doesn't really work that way. There's `ra:transfer_leadership/2`
|
|
||||||
%% for that.
|
|
||||||
try
|
|
||||||
ra:trigger_election(LocalServer, _Timeout = 1_000)
|
|
||||||
catch
|
catch
|
||||||
%% TODO
|
%% TODO
|
||||||
%% Tolerating exceptions because server might be occupied with log
|
%% Tolerating exceptions because server might be occupied with log replay for
|
||||||
%% replay for a while.
|
%% a while.
|
||||||
exit:{timeout, _} when not Bootstrap ->
|
exit:{timeout, _} when not Bootstrap ->
|
||||||
ok
|
ok
|
||||||
end;
|
|
||||||
_ ->
|
|
||||||
ok
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
server_uid(_DB, Shard) ->
|
server_uid(_DB, Shard) ->
|
||||||
|
|
|
@ -435,8 +435,8 @@ t_rebalance_offline_restarts(Config) ->
|
||||||
erpc:multicall(Nodes, emqx_ds, open_db, [?DB, Opts])
|
erpc:multicall(Nodes, emqx_ds, open_db, [?DB, Opts])
|
||||||
),
|
),
|
||||||
?retry(
|
?retry(
|
||||||
500,
|
1000,
|
||||||
10,
|
5,
|
||||||
?assertEqual([8 || _ <- Nodes], [n_shards_online(N, ?DB) || N <- Nodes])
|
?assertEqual([8 || _ <- Nodes], [n_shards_online(N, ?DB) || N <- Nodes])
|
||||||
),
|
),
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue