Merge pull request #13047 from keynslug/fix/EMQX-12366/api-ds-leave
feat(api-ds): provide more information on nonexistent site leave
This commit is contained in:
commit
3e1e4bab17
|
@ -131,7 +131,7 @@
|
||||||
-type transition() :: {add | del, site()}.
|
-type transition() :: {add | del, site()}.
|
||||||
|
|
||||||
-type update_cluster_result() ::
|
-type update_cluster_result() ::
|
||||||
ok
|
{ok, unchanged | [site()]}
|
||||||
| {error, {nonexistent_db, emqx_ds:db()}}
|
| {error, {nonexistent_db, emqx_ds:db()}}
|
||||||
| {error, {nonexistent_sites, [site()]}}
|
| {error, {nonexistent_sites, [site()]}}
|
||||||
| {error, {too_few_sites, [site()]}}
|
| {error, {too_few_sites, [site()]}}
|
||||||
|
@ -449,7 +449,7 @@ allocate_shards_trans(DB) ->
|
||||||
Allocation
|
Allocation
|
||||||
).
|
).
|
||||||
|
|
||||||
-spec assign_db_sites_trans(emqx_ds:db(), [site()]) -> ok.
|
-spec assign_db_sites_trans(emqx_ds:db(), [site()]) -> {ok, [site()]}.
|
||||||
assign_db_sites_trans(DB, Sites) ->
|
assign_db_sites_trans(DB, Sites) ->
|
||||||
Opts = db_config_trans(DB),
|
Opts = db_config_trans(DB),
|
||||||
case [S || S <- Sites, mnesia:read(?NODE_TAB, S, read) == []] of
|
case [S || S <- Sites, mnesia:read(?NODE_TAB, S, read) == []] of
|
||||||
|
@ -466,21 +466,22 @@ assign_db_sites_trans(DB, Sites) ->
|
||||||
%% 2. Ensure that sites are responsible for roughly the same number of shards.
|
%% 2. Ensure that sites are responsible for roughly the same number of shards.
|
||||||
Shards = mnesia:match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'}), write),
|
Shards = mnesia:match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'}), write),
|
||||||
Reallocation = compute_allocation(Shards, Sites, Opts),
|
Reallocation = compute_allocation(Shards, Sites, Opts),
|
||||||
lists:foreach(
|
ok = lists:foreach(
|
||||||
fun({Record, ReplicaSet}) ->
|
fun({Record, ReplicaSet}) ->
|
||||||
ok = mnesia:write(Record#?SHARD_TAB{target_set = ReplicaSet})
|
ok = mnesia:write(Record#?SHARD_TAB{target_set = ReplicaSet})
|
||||||
end,
|
end,
|
||||||
Reallocation
|
Reallocation
|
||||||
).
|
),
|
||||||
|
{ok, Sites}.
|
||||||
|
|
||||||
-spec modify_db_sites_trans(emqx_ds:db(), [transition()]) -> ok.
|
-spec modify_db_sites_trans(emqx_ds:db(), [transition()]) -> {ok, unchanged | [site()]}.
|
||||||
modify_db_sites_trans(DB, Modifications) ->
|
modify_db_sites_trans(DB, Modifications) ->
|
||||||
Shards = mnesia:match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'}), write),
|
Shards = mnesia:match_object(?SHARD_TAB, ?SHARD_PAT({DB, '_'}), write),
|
||||||
Sites0 = list_db_target_sites(Shards),
|
Sites0 = list_db_target_sites(Shards),
|
||||||
Sites = lists:foldl(fun apply_transition/2, Sites0, Modifications),
|
Sites = lists:foldl(fun apply_transition/2, Sites0, Modifications),
|
||||||
case Sites of
|
case Sites of
|
||||||
Sites0 ->
|
Sites0 ->
|
||||||
ok;
|
{ok, unchanged};
|
||||||
_Changed ->
|
_Changed ->
|
||||||
assign_db_sites_trans(DB, Sites)
|
assign_db_sites_trans(DB, Sites)
|
||||||
end.
|
end.
|
||||||
|
|
|
@ -239,7 +239,7 @@ t_rebalance(Config) ->
|
||||||
),
|
),
|
||||||
|
|
||||||
%% Scale down the cluster by removing the first node.
|
%% Scale down the cluster by removing the first node.
|
||||||
?assertEqual(ok, ds_repl_meta(N1, leave_db_site, [?DB, S1])),
|
?assertMatch({ok, _}, ds_repl_meta(N1, leave_db_site, [?DB, S1])),
|
||||||
ct:pal("Transitions (~p -> ~p): ~p~n", [
|
ct:pal("Transitions (~p -> ~p): ~p~n", [
|
||||||
Sites, tl(Sites), emqx_ds_test_helpers:transitions(N1, ?DB)
|
Sites, tl(Sites), emqx_ds_test_helpers:transitions(N1, ?DB)
|
||||||
]),
|
]),
|
||||||
|
@ -297,12 +297,12 @@ t_join_leave_errors(Config) ->
|
||||||
),
|
),
|
||||||
%% NOTE: Leaving a non-existent site is not an error.
|
%% NOTE: Leaving a non-existent site is not an error.
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
ok,
|
{ok, unchanged},
|
||||||
ds_repl_meta(N1, leave_db_site, [?DB, <<"NO-MANS-SITE">>])
|
ds_repl_meta(N1, leave_db_site, [?DB, <<"NO-MANS-SITE">>])
|
||||||
),
|
),
|
||||||
|
|
||||||
%% Should be no-op.
|
%% Should be no-op.
|
||||||
?assertEqual(ok, ds_repl_meta(N1, join_db_site, [?DB, S1])),
|
?assertEqual({ok, unchanged}, ds_repl_meta(N1, join_db_site, [?DB, S1])),
|
||||||
?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)),
|
?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)),
|
||||||
|
|
||||||
%% Impossible to leave the last site.
|
%% Impossible to leave the last site.
|
||||||
|
@ -312,13 +312,13 @@ t_join_leave_errors(Config) ->
|
||||||
),
|
),
|
||||||
|
|
||||||
%% "Move" the DB to the other node.
|
%% "Move" the DB to the other node.
|
||||||
?assertEqual(ok, ds_repl_meta(N1, join_db_site, [?DB, S2])),
|
?assertMatch({ok, _}, ds_repl_meta(N1, join_db_site, [?DB, S2])),
|
||||||
?assertEqual(ok, ds_repl_meta(N2, leave_db_site, [?DB, S1])),
|
?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?DB, S1])),
|
||||||
?assertMatch([_ | _], emqx_ds_test_helpers:transitions(N1, ?DB)),
|
?assertMatch([_ | _], emqx_ds_test_helpers:transitions(N1, ?DB)),
|
||||||
?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))),
|
?retry(1000, 10, ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB))),
|
||||||
|
|
||||||
%% Should be no-op.
|
%% Should be no-op.
|
||||||
?assertEqual(ok, ds_repl_meta(N2, leave_db_site, [?DB, S1])),
|
?assertMatch({ok, _}, ds_repl_meta(N2, leave_db_site, [?DB, S1])),
|
||||||
?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)).
|
?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)).
|
||||||
|
|
||||||
t_rebalance_chaotic_converges(init, Config) ->
|
t_rebalance_chaotic_converges(init, Config) ->
|
||||||
|
@ -457,7 +457,7 @@ t_rebalance_offline_restarts(Config) ->
|
||||||
|
|
||||||
%% Shut down N3 and then remove it from the DB.
|
%% Shut down N3 and then remove it from the DB.
|
||||||
ok = emqx_cth_cluster:stop_node(N3),
|
ok = emqx_cth_cluster:stop_node(N3),
|
||||||
?assertEqual(ok, ds_repl_meta(N1, leave_db_site, [?DB, S3])),
|
?assertMatch({ok, _}, ds_repl_meta(N1, leave_db_site, [?DB, S3])),
|
||||||
Transitions = emqx_ds_test_helpers:transitions(N1, ?DB),
|
Transitions = emqx_ds_test_helpers:transitions(N1, ?DB),
|
||||||
ct:pal("Transitions: ~p~n", [Transitions]),
|
ct:pal("Transitions: ~p~n", [Transitions]),
|
||||||
|
|
||||||
|
|
|
@ -164,8 +164,8 @@ apply_stream(DB, NodeStream0, Stream0, N) ->
|
||||||
->
|
->
|
||||||
?tp(notice, test_apply_operation, #{node => Node, operation => Operation, arg => Arg}),
|
?tp(notice, test_apply_operation, #{node => Node, operation => Operation, arg => Arg}),
|
||||||
%% Apply the transition.
|
%% Apply the transition.
|
||||||
?assertEqual(
|
?assertMatch(
|
||||||
ok,
|
{ok, _},
|
||||||
?ON(
|
?ON(
|
||||||
Node,
|
Node,
|
||||||
emqx_ds_replication_layer_meta:Operation(DB, Arg)
|
emqx_ds_replication_layer_meta:Operation(DB, Arg)
|
||||||
|
|
|
@ -323,17 +323,11 @@ get_db(get, #{bindings := #{ds := DB}}) ->
|
||||||
}).
|
}).
|
||||||
|
|
||||||
db_replicas(get, #{bindings := #{ds := DB}}) ->
|
db_replicas(get, #{bindings := #{ds := DB}}) ->
|
||||||
Replicas = lists:flatmap(
|
Replicas = emqx_ds_replication_layer_meta:db_sites(DB),
|
||||||
fun(Shard) ->
|
?OK(Replicas);
|
||||||
#{replica_set := RS} = emqx_ds_replication_layer_meta:shard_info(DB, Shard),
|
|
||||||
maps:keys(RS)
|
|
||||||
end,
|
|
||||||
emqx_ds_replication_layer_meta:shards(DB)
|
|
||||||
),
|
|
||||||
?OK(lists:usort(Replicas));
|
|
||||||
db_replicas(put, #{bindings := #{ds := DB}, body := Sites}) ->
|
db_replicas(put, #{bindings := #{ds := DB}, body := Sites}) ->
|
||||||
case update_db_sites(DB, Sites, rest) of
|
case update_db_sites(DB, Sites, rest) of
|
||||||
ok ->
|
{ok, _} ->
|
||||||
{202, <<"OK">>};
|
{202, <<"OK">>};
|
||||||
{error, Description} ->
|
{error, Description} ->
|
||||||
?BAD_REQUEST(400, Description)
|
?BAD_REQUEST(400, Description)
|
||||||
|
@ -341,21 +335,23 @@ db_replicas(put, #{bindings := #{ds := DB}, body := Sites}) ->
|
||||||
|
|
||||||
db_replica(put, #{bindings := #{ds := DB, site := Site}}) ->
|
db_replica(put, #{bindings := #{ds := DB, site := Site}}) ->
|
||||||
case join(DB, Site, rest) of
|
case join(DB, Site, rest) of
|
||||||
ok ->
|
{ok, _} ->
|
||||||
{202, <<"OK">>};
|
{202, <<"OK">>};
|
||||||
{error, Description} ->
|
{error, Description} ->
|
||||||
?BAD_REQUEST(400, Description)
|
?BAD_REQUEST(400, Description)
|
||||||
end;
|
end;
|
||||||
db_replica(delete, #{bindings := #{ds := DB, site := Site}}) ->
|
db_replica(delete, #{bindings := #{ds := DB, site := Site}}) ->
|
||||||
case leave(DB, Site, rest) of
|
case leave(DB, Site, rest) of
|
||||||
ok ->
|
{ok, Sites} when is_list(Sites) ->
|
||||||
{202, <<"OK">>};
|
{202, <<"OK">>};
|
||||||
|
{ok, unchanged} ->
|
||||||
|
?NOT_FOUND(<<"Site is not part of replica set">>);
|
||||||
{error, Description} ->
|
{error, Description} ->
|
||||||
?BAD_REQUEST(400, Description)
|
?BAD_REQUEST(400, Description)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec update_db_sites(emqx_ds:db(), [emqx_ds_replication_layer_meta:site()], rest | cli) ->
|
-spec update_db_sites(emqx_ds:db(), [emqx_ds_replication_layer_meta:site()], rest | cli) ->
|
||||||
ok | {error, binary()}.
|
{ok, [emqx_ds_replication_layer_meta:site()]} | {error, _}.
|
||||||
update_db_sites(DB, Sites, Via) when is_list(Sites) ->
|
update_db_sites(DB, Sites, Via) when is_list(Sites) ->
|
||||||
?SLOG(warning, #{
|
?SLOG(warning, #{
|
||||||
msg => "durable_storage_rebalance_request", ds => DB, sites => Sites, via => Via
|
msg => "durable_storage_rebalance_request", ds => DB, sites => Sites, via => Via
|
||||||
|
@ -364,14 +360,16 @@ update_db_sites(DB, Sites, Via) when is_list(Sites) ->
|
||||||
update_db_sites(_, _, _) ->
|
update_db_sites(_, _, _) ->
|
||||||
{error, <<"Bad type">>}.
|
{error, <<"Bad type">>}.
|
||||||
|
|
||||||
-spec join(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) -> ok | {error, _}.
|
-spec join(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) ->
|
||||||
|
{ok, unchanged | [emqx_ds_replication_layer_meta:site()]} | {error, _}.
|
||||||
join(DB, Site, Via) ->
|
join(DB, Site, Via) ->
|
||||||
?SLOG(warning, #{
|
?SLOG(warning, #{
|
||||||
msg => "durable_storage_join_request", ds => DB, site => Site, via => Via
|
msg => "durable_storage_join_request", ds => DB, site => Site, via => Via
|
||||||
}),
|
}),
|
||||||
meta_result_to_binary(emqx_ds_replication_layer_meta:join_db_site(DB, Site)).
|
meta_result_to_binary(emqx_ds_replication_layer_meta:join_db_site(DB, Site)).
|
||||||
|
|
||||||
-spec leave(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) -> ok | {error, _}.
|
-spec leave(emqx_ds:db(), emqx_ds_replication_layer_meta:site(), rest | cli) ->
|
||||||
|
{ok, unchanged | [emqx_ds_replication_layer_meta:site()]} | {error, _}.
|
||||||
leave(DB, Site, Via) ->
|
leave(DB, Site, Via) ->
|
||||||
?SLOG(warning, #{
|
?SLOG(warning, #{
|
||||||
msg => "durable_storage_leave_request", ds => DB, site => Site, via => Via
|
msg => "durable_storage_leave_request", ds => DB, site => Site, via => Via
|
||||||
|
@ -468,8 +466,8 @@ list_shards(DB) ->
|
||||||
|| Shard <- emqx_ds_replication_layer_meta:shards(DB)
|
|| Shard <- emqx_ds_replication_layer_meta:shards(DB)
|
||||||
].
|
].
|
||||||
|
|
||||||
meta_result_to_binary(ok) ->
|
meta_result_to_binary({ok, Result}) ->
|
||||||
ok;
|
{ok, Result};
|
||||||
meta_result_to_binary({error, {nonexistent_sites, UnknownSites}}) ->
|
meta_result_to_binary({error, {nonexistent_sites, UnknownSites}}) ->
|
||||||
Msg = ["Unknown sites: " | lists:join(", ", UnknownSites)],
|
Msg = ["Unknown sites: " | lists:join(", ", UnknownSites)],
|
||||||
{error, iolist_to_binary(Msg)};
|
{error, iolist_to_binary(Msg)};
|
||||||
|
|
|
@ -855,7 +855,7 @@ do_ds(["set_replicas", DBStr | SitesStr]) ->
|
||||||
{ok, DB} ->
|
{ok, DB} ->
|
||||||
Sites = lists:map(fun list_to_binary/1, SitesStr),
|
Sites = lists:map(fun list_to_binary/1, SitesStr),
|
||||||
case emqx_mgmt_api_ds:update_db_sites(DB, Sites, cli) of
|
case emqx_mgmt_api_ds:update_db_sites(DB, Sites, cli) of
|
||||||
ok ->
|
{ok, _} ->
|
||||||
emqx_ctl:print("ok~n");
|
emqx_ctl:print("ok~n");
|
||||||
{error, Description} ->
|
{error, Description} ->
|
||||||
emqx_ctl:print("Unable to update replicas: ~s~n", [Description])
|
emqx_ctl:print("Unable to update replicas: ~s~n", [Description])
|
||||||
|
@ -867,7 +867,9 @@ do_ds(["join", DBStr, Site]) ->
|
||||||
case emqx_utils:safe_to_existing_atom(DBStr) of
|
case emqx_utils:safe_to_existing_atom(DBStr) of
|
||||||
{ok, DB} ->
|
{ok, DB} ->
|
||||||
case emqx_mgmt_api_ds:join(DB, list_to_binary(Site), cli) of
|
case emqx_mgmt_api_ds:join(DB, list_to_binary(Site), cli) of
|
||||||
ok ->
|
{ok, unchanged} ->
|
||||||
|
emqx_ctl:print("unchanged~n");
|
||||||
|
{ok, _} ->
|
||||||
emqx_ctl:print("ok~n");
|
emqx_ctl:print("ok~n");
|
||||||
{error, Description} ->
|
{error, Description} ->
|
||||||
emqx_ctl:print("Unable to update replicas: ~s~n", [Description])
|
emqx_ctl:print("Unable to update replicas: ~s~n", [Description])
|
||||||
|
@ -879,7 +881,9 @@ do_ds(["leave", DBStr, Site]) ->
|
||||||
case emqx_utils:safe_to_existing_atom(DBStr) of
|
case emqx_utils:safe_to_existing_atom(DBStr) of
|
||||||
{ok, DB} ->
|
{ok, DB} ->
|
||||||
case emqx_mgmt_api_ds:leave(DB, list_to_binary(Site), cli) of
|
case emqx_mgmt_api_ds:leave(DB, list_to_binary(Site), cli) of
|
||||||
ok ->
|
{ok, unchanged} ->
|
||||||
|
emqx_ctl:print("unchanged~n");
|
||||||
|
{ok, _} ->
|
||||||
emqx_ctl:print("ok~n");
|
emqx_ctl:print("ok~n");
|
||||||
{error, Description} ->
|
{error, Description} ->
|
||||||
emqx_ctl:print("Unable to update replicas: ~s~n", [Description])
|
emqx_ctl:print("Unable to update replicas: ~s~n", [Description])
|
||||||
|
|
|
@ -174,6 +174,14 @@ t_leave(_) ->
|
||||||
request_api(delete, Path)
|
request_api(delete, Path)
|
||||||
).
|
).
|
||||||
|
|
||||||
|
t_leave_notfound(_) ->
|
||||||
|
Site = "not_part_of_replica_set",
|
||||||
|
Path = api_path(["ds", "storages", "emqx_persistent_message", "replicas", Site]),
|
||||||
|
?assertMatch(
|
||||||
|
{error, {_, 404, _}},
|
||||||
|
request_api(delete, Path)
|
||||||
|
).
|
||||||
|
|
||||||
parse_error({ok, Code, JSON}) ->
|
parse_error({ok, Code, JSON}) ->
|
||||||
{ok, Code, emqx_utils_json:decode(JSON)};
|
{ok, Code, emqx_utils_json:decode(JSON)};
|
||||||
parse_error(Err) ->
|
parse_error(Err) ->
|
||||||
|
|
Loading…
Reference in New Issue