fix(dsrepl): ensure that new member UID matches server's UID

Before that change, UIDs supplied in the `ra:add_member/3` were not
the same as those servers were using. This haven't caused any issues
for some reason, but it's better to ensure that UIDs are the same.
This commit is contained in:
Andrew Mayorov 2024-04-07 22:31:24 +02:00
parent 556ffc78c9
commit 826ce5806d
No known key found for this signature in database
GPG Key ID: 2837C62ACFBFED5D
1 changed files with 36 additions and 20 deletions

View File

@ -80,14 +80,6 @@ server_name(DB, Shard, Site) ->
DBBin = atom_to_binary(DB),
binary_to_atom(<<"ds_", DBBin/binary, Shard/binary, "_", Site/binary>>).
server_uid(_DB, Shard) ->
%% NOTE
%% Each new "instance" of a server should have a unique identifier. Otherwise,
%% if some server migrates to another node during rebalancing, and then comes
%% back, `ra` will be very confused by it having the same UID as before.
Ts = integer_to_binary(erlang:system_time(microsecond)),
<<Shard/binary, "_", Ts/binary>>.
%%
servers(DB, Shard, _Order = leader_preferred) ->
@ -159,11 +151,19 @@ add_local_server(DB, Shard) ->
%% readiness.
ShardServers = shard_servers(DB, Shard),
LocalServer = local_server(DB, Shard),
ServerRecord = #{
id => LocalServer,
membership => promotable,
uid => server_uid(DB, Shard)
},
case server_info(uid, LocalServer) of
UID when is_binary(UID) ->
ServerRecord = #{
id => LocalServer,
membership => promotable,
uid => UID
};
unknown ->
ServerRecord = #{
id => LocalServer,
membership => voter
}
end,
case ra:add_member(ShardServers, ServerRecord, ?MEMBERSHIP_CHANGE_TIMEOUT) of
{ok, _, _Leader} ->
ok;
@ -206,15 +206,13 @@ server_info(readiness, Server) ->
unknown
end;
server_info(leader, Server) ->
current_leader(Server).
current_leader(Server);
server_info(uid, Server) ->
maps:get(uid, ra_overview(Server), unknown).
member_info(readiness, Server, Leader) ->
case ra:member_overview(Leader) of
{ok, #{cluster := Cluster}, _} ->
member_readiness(maps:get(Server, Cluster));
_Error ->
unknown
end.
Cluster = maps:get(cluster, ra_overview(Leader), #{}),
member_readiness(maps:get(Server, Cluster, #{})).
current_leader(Server) ->
case ra:members(Server) of
@ -234,6 +232,14 @@ member_readiness(#{status := Status, voter_status := #{membership := Membership}
member_readiness(#{}) ->
unknown.
ra_overview(Server) ->
case ra:member_overview(Server) of
{ok, Overview, _Leader} ->
Overview;
_Error ->
#{}
end.
%%
init({DB, Shard, Opts}) ->
@ -305,6 +311,16 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) ->
ok
end.
server_uid(_DB, Shard) ->
%% NOTE
%% Each new "instance" of a server should have a unique identifier. Otherwise,
%% if some server migrates to another node during rebalancing, and then comes
%% back, `ra` will be very confused by it having the same UID as before.
%% Keeping the shard ID as a prefix to make it easier to identify the server
%% in the filesystem / logs / etc.
Ts = integer_to_binary(erlang:system_time(microsecond)),
<<Shard/binary, "_", Ts/binary>>.
%%
memoize(Fun, Args) ->