fix(dsrepl): correctly handle ra membership change command results
Before this change, results similar to `{error, {no_more_servers_to_try, [{error, nodedown}, {error, not_member}]}}` were considered retryable failures, which is incorrect.
This commit is contained in:
parent
3223797ae5
commit
d12e907209
|
@ -173,13 +173,14 @@ add_local_server(DB, Shard) ->
|
||||||
membership => voter
|
membership => voter
|
||||||
}
|
}
|
||||||
end,
|
end,
|
||||||
case ra:add_member(ShardServers, ServerRecord, ?MEMBERSHIP_CHANGE_TIMEOUT) of
|
Timeout = ?MEMBERSHIP_CHANGE_TIMEOUT,
|
||||||
|
case ra_try_servers(ShardServers, fun ra:add_member/3, [ServerRecord, Timeout]) of
|
||||||
{ok, _, _Leader} ->
|
{ok, _, _Leader} ->
|
||||||
ok;
|
ok;
|
||||||
{error, already_member} ->
|
{error, already_member} ->
|
||||||
ok;
|
ok;
|
||||||
{error, Reason} ->
|
Error ->
|
||||||
{error, recoverable, Reason}
|
{error, recoverable, Error}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Remove a local server from the shard cluster and clean up on-disk data.
|
%% @doc Remove a local server from the shard cluster and clean up on-disk data.
|
||||||
|
@ -219,13 +220,14 @@ drop_local_server(DB, Shard) ->
|
||||||
ok | emqx_ds:error(_Reason).
|
ok | emqx_ds:error(_Reason).
|
||||||
remove_server(DB, Shard, Server) ->
|
remove_server(DB, Shard, Server) ->
|
||||||
ShardServers = shard_servers(DB, Shard),
|
ShardServers = shard_servers(DB, Shard),
|
||||||
case ra:remove_member(ShardServers, Server, ?MEMBERSHIP_CHANGE_TIMEOUT) of
|
Timeout = ?MEMBERSHIP_CHANGE_TIMEOUT,
|
||||||
|
case ra_try_servers(ShardServers, fun ra:remove_member/3, [Server, Timeout]) of
|
||||||
{ok, _, _Leader} ->
|
{ok, _, _Leader} ->
|
||||||
ok;
|
ok;
|
||||||
{error, not_member} ->
|
{error, not_member} ->
|
||||||
ok;
|
ok;
|
||||||
{error, Reason} ->
|
Error ->
|
||||||
{error, recoverable, Reason}
|
{error, recoverable, Error}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec server_info
|
-spec server_info
|
||||||
|
@ -272,6 +274,20 @@ member_readiness(#{status := Status, voter_status := #{membership := Membership}
|
||||||
member_readiness(#{}) ->
|
member_readiness(#{}) ->
|
||||||
unknown.
|
unknown.
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
ra_try_servers([Server | Rest], Fun, Args) ->
|
||||||
|
case erlang:apply(Fun, [Server | Args]) of
|
||||||
|
{ok, R, Leader} ->
|
||||||
|
{ok, R, Leader};
|
||||||
|
{error, Reason} when Reason == noproc; Reason == nodedown ->
|
||||||
|
ra_try_servers(Rest, Fun, Args);
|
||||||
|
ErrorOrTimeout ->
|
||||||
|
ErrorOrTimeout
|
||||||
|
end;
|
||||||
|
ra_try_servers([], _Fun, _Args) ->
|
||||||
|
{error, servers_unreachable}.
|
||||||
|
|
||||||
ra_overview(Server) ->
|
ra_overview(Server) ->
|
||||||
case ra:member_overview(Server) of
|
case ra:member_overview(Server) of
|
||||||
{ok, Overview, _Leader} ->
|
{ok, Overview, _Leader} ->
|
||||||
|
|
Loading…
Reference in New Issue