fix(dsrepl): tolerate trigger election timeouts for existing servers
This commit is contained in:
parent
6c6ea50e42
commit
f89909f60c
|
@ -153,8 +153,9 @@ start_shard(DB, Shard) ->
|
||||||
Servers = shard_servers(DB, Shard),
|
Servers = shard_servers(DB, Shard),
|
||||||
case ra:restart_server(System, LocalServer) of
|
case ra:restart_server(System, LocalServer) of
|
||||||
ok ->
|
ok ->
|
||||||
ok;
|
Bootstrap = false;
|
||||||
{error, name_not_registered} ->
|
{error, name_not_registered} ->
|
||||||
|
Bootstrap = true,
|
||||||
ok = ra:start_server(System, #{
|
ok = ra:start_server(System, #{
|
||||||
id => LocalServer,
|
id => LocalServer,
|
||||||
uid => <<ClusterName/binary, "_", Site/binary>>,
|
uid => <<ClusterName/binary, "_", Site/binary>>,
|
||||||
|
@ -172,7 +173,17 @@ start_shard(DB, Shard) ->
|
||||||
%% is not really required otherwise.
|
%% is not really required otherwise.
|
||||||
%% TODO
|
%% TODO
|
||||||
%% Ensure that doing that on node restart does not disrupt consensus.
|
%% Ensure that doing that on node restart does not disrupt consensus.
|
||||||
ok = ra:trigger_election(LocalServer);
|
%% Edit: looks like it doesn't, this could actually be quite useful
|
||||||
|
%% to "steal" leadership from nodes that have too much leader load.
|
||||||
|
try
|
||||||
|
ra:trigger_election(LocalServer, _Timeout = 1_000)
|
||||||
|
catch
|
||||||
|
%% TODO
|
||||||
|
%% Tolerating exceptions because server might be occupied with log
|
||||||
|
%% replay for a while.
|
||||||
|
exit:{timeout, _} when not Bootstrap ->
|
||||||
|
ok
|
||||||
|
end;
|
||||||
_ ->
|
_ ->
|
||||||
ok
|
ok
|
||||||
end,
|
end,
|
||||||
|
|
Loading…
Reference in New Issue