test(dsrepl): add crash-restart-recover testcase
That verifies nothing is lost in the event of abrupt node failures.
This commit is contained in:
parent
ae89b61af0
commit
8db70b5bbc
|
@ -794,6 +794,77 @@ t_store_batch_fail(_Config) ->
|
||||||
]
|
]
|
||||||
).
|
).
|
||||||
|
|
||||||
|
t_crash_restart_recover(init, Config) ->
|
||||||
|
Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft],
|
||||||
|
Specs = emqx_cth_cluster:mk_nodespecs(
|
||||||
|
[
|
||||||
|
{t_crash_stop_recover1, #{apps => Apps}},
|
||||||
|
{t_crash_stop_recover2, #{apps => Apps}},
|
||||||
|
{t_crash_stop_recover3, #{apps => Apps}}
|
||||||
|
],
|
||||||
|
#{work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)}
|
||||||
|
),
|
||||||
|
Nodes = emqx_cth_cluster:start(Specs),
|
||||||
|
[{nodes, Nodes}, {nodespecs, Specs} | Config];
|
||||||
|
t_crash_restart_recover('end', Config) ->
|
||||||
|
ok = emqx_cth_cluster:stop(?config(nodes, Config)).
|
||||||
|
|
||||||
|
t_crash_restart_recover(Config) ->
|
||||||
|
%% This testcase verifies that in the event of abrupt site failure message data is
|
||||||
|
%% correctly preserved.
|
||||||
|
Nodes = [N1, N2, N3] = ?config(nodes, Config),
|
||||||
|
_Specs = [_, NS2, NS3] = ?config(nodespecs, Config),
|
||||||
|
DBOpts = opts(#{n_shards => 16, n_sites => 3, replication_factor => 3}),
|
||||||
|
|
||||||
|
%% Prepare test event stream.
|
||||||
|
{Stream0, TopicStreams} = emqx_ds_test_helpers:interleaved_topic_messages(
|
||||||
|
?FUNCTION_NAME, _NClients = 8, _NMsgs = 400
|
||||||
|
),
|
||||||
|
Stream1 = emqx_utils_stream:interleave(
|
||||||
|
[
|
||||||
|
{300, Stream0},
|
||||||
|
emqx_utils_stream:const(add_generation)
|
||||||
|
],
|
||||||
|
false
|
||||||
|
),
|
||||||
|
Stream = emqx_utils_stream:interleave(
|
||||||
|
[
|
||||||
|
{1000, Stream1},
|
||||||
|
emqx_utils_stream:list([
|
||||||
|
fun() -> kill_restart_node_async(N2, NS2, DBOpts) end,
|
||||||
|
fun() -> kill_restart_node_async(N3, NS3, DBOpts) end
|
||||||
|
])
|
||||||
|
],
|
||||||
|
true
|
||||||
|
),
|
||||||
|
|
||||||
|
?check_trace(
|
||||||
|
begin
|
||||||
|
%% Initialize DB on all nodes.
|
||||||
|
?assertEqual(
|
||||||
|
[{ok, ok} || _ <- Nodes],
|
||||||
|
erpc:multicall(Nodes, emqx_ds, open_db, [?DB, DBOpts])
|
||||||
|
),
|
||||||
|
|
||||||
|
%% Apply the test events, including simulated node crashes.
|
||||||
|
NodeStream = emqx_utils_stream:const(N1),
|
||||||
|
emqx_ds_test_helpers:apply_stream(?DB, NodeStream, Stream, 0),
|
||||||
|
timer:sleep(5000),
|
||||||
|
|
||||||
|
%% Verify that all the data is there.
|
||||||
|
emqx_ds_test_helpers:verify_stream_effects(?DB, ?FUNCTION_NAME, Nodes, TopicStreams)
|
||||||
|
end,
|
||||||
|
[]
|
||||||
|
).
|
||||||
|
|
||||||
|
kill_restart_node_async(Node, Spec, DBOpts) ->
|
||||||
|
erlang:spawn_link(?MODULE, kill_restart_node, [Node, Spec, DBOpts]).
|
||||||
|
|
||||||
|
kill_restart_node(Node, Spec, DBOpts) ->
|
||||||
|
ok = emqx_cth_peer:kill(Node),
|
||||||
|
_ = emqx_cth_cluster:restart(Spec),
|
||||||
|
ok = erpc:call(Node, emqx_ds, open_db, [?DB, DBOpts]).
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
shard_server_info(Node, DB, Shard, Site, Info) ->
|
shard_server_info(Node, DB, Shard, Site, Info) ->
|
||||||
|
|
|
@ -188,12 +188,14 @@ apply_stream(DB, NodeStream0, Stream0, N) ->
|
||||||
?ON(Node, emqx_ds:store_batch(DB, [Msg], #{sync => true})),
|
?ON(Node, emqx_ds:store_batch(DB, [Msg], #{sync => true})),
|
||||||
apply_stream(DB, NodeStream, Stream, N + 1);
|
apply_stream(DB, NodeStream, Stream, N + 1);
|
||||||
[add_generation | Stream] ->
|
[add_generation | Stream] ->
|
||||||
%% FIXME:
|
?tp(notice, test_add_generation, #{}),
|
||||||
[Node | NodeStream] = emqx_utils_stream:next(NodeStream0),
|
[Node | NodeStream] = emqx_utils_stream:next(NodeStream0),
|
||||||
?ON(Node, emqx_ds:add_generation(DB)),
|
?ON(Node, emqx_ds:add_generation(DB)),
|
||||||
apply_stream(DB, NodeStream, Stream, N);
|
apply_stream(DB, NodeStream, Stream, N);
|
||||||
[{Node, Operation, Arg} | Stream] when
|
[{Node, Operation, Arg} | Stream] when
|
||||||
Operation =:= join_db_site; Operation =:= leave_db_site; Operation =:= assign_db_sites
|
Operation =:= join_db_site;
|
||||||
|
Operation =:= leave_db_site;
|
||||||
|
Operation =:= assign_db_sites
|
||||||
->
|
->
|
||||||
?tp(notice, test_apply_operation, #{node => Node, operation => Operation, arg => Arg}),
|
?tp(notice, test_apply_operation, #{node => Node, operation => Operation, arg => Arg}),
|
||||||
%% Apply the transition.
|
%% Apply the transition.
|
||||||
|
|
Loading…
Reference in New Issue