wip: tolerate trigger election timeouts for existing servers

This commit is contained in:
Andrew Mayorov 2024-02-12 18:45:50 +01:00
parent 114efb9818
commit 5ab036a5d2
No known key found for this signature in database
GPG Key ID: 2837C62ACFBFED5D
1 changed files with 13 additions and 2 deletions

View File

@ -153,8 +153,9 @@ start_shard(DB, Shard) ->
Servers = shard_servers(DB, Shard),
case ra:restart_server(System, LocalServer) of
ok ->
ok;
Bootstrap = false;
{error, name_not_registered} ->
Bootstrap = true,
ok = ra:start_server(System, #{
id => LocalServer,
uid => <<ClusterName/binary, "_", Site/binary>>,
@ -172,7 +173,17 @@ start_shard(DB, Shard) ->
%% is not really required otherwise.
%% TODO
%% Ensure that doing that on node restart does not disrupt consensus.
ok = ra:trigger_election(LocalServer);
%% Edit: looks like it doesn't, this could actually be quite useful
%% to "steal" leadership from nodes that have too much leader load.
try
ra:trigger_election(LocalServer, _Timeout = 1_000)
catch
%% TODO
%% Tolerating exceptions because server might be occupied with log
%% replay for a while.
exit:{timeout, _} when not Bootstrap ->
ok
end;
_ ->
ok
end,