feat(ds): Assign latest timestamp deterministically
This commit is contained in:
parent
2236af84ba
commit
a0a3977043
|
@ -734,20 +734,20 @@ apply(
|
||||||
{State, Result};
|
{State, Result};
|
||||||
apply(
|
apply(
|
||||||
_RaftMeta,
|
_RaftMeta,
|
||||||
#{?tag := storage_event, ?payload := CustomEvent},
|
#{?tag := storage_event, ?payload := CustomEvent, ?now := Now},
|
||||||
#{db_shard := DBShard, latest := Latest0} = State
|
#{db_shard := DBShard, latest := Latest0} = State
|
||||||
) ->
|
) ->
|
||||||
{Timestamp, Latest} = ensure_monotonic_timestamp(emqx_ds:timestamp_us(), Latest0),
|
Latest = max(Latest0, Now),
|
||||||
set_ts(DBShard, Latest),
|
set_ts(DBShard, Latest),
|
||||||
?tp(
|
?tp(
|
||||||
debug,
|
debug,
|
||||||
emqx_ds_replication_layer_storage_event,
|
emqx_ds_replication_layer_storage_event,
|
||||||
#{
|
#{
|
||||||
shard => DBShard, ts => Timestamp, payload => CustomEvent
|
shard => DBShard, payload => CustomEvent, latest => Latest
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
Effects = handle_custom_event(DBShard, Timestamp, CustomEvent),
|
Effects = handle_custom_event(DBShard, Latest, CustomEvent),
|
||||||
{State#{latest := Latest}, ok, Effects}.
|
{State#{latest => Latest}, ok, Effects}.
|
||||||
|
|
||||||
-spec tick(integer(), ra_state()) -> ra_machine:effects().
|
-spec tick(integer(), ra_state()) -> ra_machine:effects().
|
||||||
tick(TimeMs, #{db_shard := DBShard = {DB, Shard}, latest := Latest}) ->
|
tick(TimeMs, #{db_shard := DBShard = {DB, Shard}, latest := Latest}) ->
|
||||||
|
@ -791,7 +791,7 @@ snapshot_module() ->
|
||||||
handle_custom_event(DBShard, Latest, Event) ->
|
handle_custom_event(DBShard, Latest, Event) ->
|
||||||
try
|
try
|
||||||
Events = emqx_ds_storage_layer:handle_event(DBShard, Latest, Event),
|
Events = emqx_ds_storage_layer:handle_event(DBShard, Latest, Event),
|
||||||
[{append, #{?tag => storage_event, ?payload => I}} || I <- Events]
|
[{append, #{?tag => storage_event, ?payload => I, ?now => Latest}} || I <- Events]
|
||||||
catch
|
catch
|
||||||
EC:Err:Stacktrace ->
|
EC:Err:Stacktrace ->
|
||||||
?tp(error, ds_storage_custom_even_fail, #{
|
?tp(error, ds_storage_custom_even_fail, #{
|
||||||
|
|
|
@ -43,5 +43,6 @@
|
||||||
|
|
||||||
%% custom events
|
%% custom events
|
||||||
-define(payload, 2).
|
-define(payload, 2).
|
||||||
|
-define(now, 3).
|
||||||
|
|
||||||
-endif.
|
-endif.
|
||||||
|
|
|
@ -41,7 +41,7 @@ opts(Overrides) ->
|
||||||
#{
|
#{
|
||||||
backend => builtin,
|
backend => builtin,
|
||||||
%% storage => {emqx_ds_storage_reference, #{}},
|
%% storage => {emqx_ds_storage_reference, #{}},
|
||||||
storage => {emqx_ds_storage_bitfield_lts, #{epoch_bits => 1}},
|
storage => {emqx_ds_storage_bitfield_lts, #{epoch_bits => 10}},
|
||||||
n_shards => 16,
|
n_shards => 16,
|
||||||
n_sites => 1,
|
n_sites => 1,
|
||||||
replication_factor => 3,
|
replication_factor => 3,
|
||||||
|
@ -159,7 +159,6 @@ t_rebalance('end', Config) ->
|
||||||
t_rebalance(Config) ->
|
t_rebalance(Config) ->
|
||||||
NMsgs = 50,
|
NMsgs = 50,
|
||||||
NClients = 5,
|
NClients = 5,
|
||||||
NEvents = NMsgs * NClients,
|
|
||||||
%% List of fake client IDs:
|
%% List of fake client IDs:
|
||||||
Clients = [integer_to_binary(I) || I <- lists:seq(1, NClients)],
|
Clients = [integer_to_binary(I) || I <- lists:seq(1, NClients)],
|
||||||
%% List of streams that generate messages for each "client" in its own topic:
|
%% List of streams that generate messages for each "client" in its own topic:
|
||||||
|
@ -168,7 +167,16 @@ t_rebalance(Config) ->
|
||||||
|| ClientId <- Clients
|
|| ClientId <- Clients
|
||||||
],
|
],
|
||||||
%% Interleaved list of events:
|
%% Interleaved list of events:
|
||||||
Stream = emqx_utils_stream:interleave([{2, Stream} || {_ClientId, Stream} <- TopicStreams]),
|
Stream0 = emqx_utils_stream:interleave(
|
||||||
|
[{2, Stream} || {_ClientId, Stream} <- TopicStreams], true
|
||||||
|
),
|
||||||
|
Stream = emqx_utils_stream:interleave(
|
||||||
|
[
|
||||||
|
{50, Stream0},
|
||||||
|
emqx_utils_stream:const(add_generation)
|
||||||
|
],
|
||||||
|
false
|
||||||
|
),
|
||||||
Nodes = [N1, N2, N3, N4] = ?config(nodes, Config),
|
Nodes = [N1, N2, N3, N4] = ?config(nodes, Config),
|
||||||
?check_trace(
|
?check_trace(
|
||||||
#{timetrap => 30_000},
|
#{timetrap => 30_000},
|
||||||
|
@ -176,15 +184,22 @@ t_rebalance(Config) ->
|
||||||
%% 0. Inject schedulings to make sure the messages are
|
%% 0. Inject schedulings to make sure the messages are
|
||||||
%% written to the storage before, during, and after
|
%% written to the storage before, during, and after
|
||||||
%% rebalance:
|
%% rebalance:
|
||||||
?force_ordering(#{?snk_kind := test_push_message, n := 10}, #{
|
?force_ordering(
|
||||||
?snk_kind := test_start_rebalance
|
#{?snk_kind := test_push_message, n := 10},
|
||||||
}),
|
#{?snk_kind := test_start_rebalance}
|
||||||
?force_ordering(#{?snk_kind := test_start_rebalance}, #{
|
),
|
||||||
?snk_kind := test_push_message, n := 20
|
?force_ordering(
|
||||||
}),
|
#{?snk_kind := test_start_rebalance1},
|
||||||
?force_ordering(#{?snk_kind := test_end_rebalance}, #{
|
#{?snk_kind := test_push_message, n := 20}
|
||||||
?snk_kind := test_push_message, n := 30
|
),
|
||||||
}),
|
?force_ordering(
|
||||||
|
#{?snk_kind := test_push_message, n := 30},
|
||||||
|
#{?snk_kind := test_start_rebalance2}
|
||||||
|
),
|
||||||
|
?force_ordering(
|
||||||
|
#{?snk_kind := test_end_rebalance},
|
||||||
|
#{?snk_kind := test_push_message, n := 40}
|
||||||
|
),
|
||||||
%% 1. Initialize DB on the first node.
|
%% 1. Initialize DB on the first node.
|
||||||
Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}),
|
Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}),
|
||||||
|
|
||||||
|
@ -224,7 +239,7 @@ t_rebalance(Config) ->
|
||||||
),
|
),
|
||||||
|
|
||||||
%% 3. Start rebalance in the meanwhile:
|
%% 3. Start rebalance in the meanwhile:
|
||||||
?tp(test_start_rebalance, #{}),
|
?tp(test_start_rebalance1, #{}),
|
||||||
%% 3.1 Join the second site to the DB replication sites.
|
%% 3.1 Join the second site to the DB replication sites.
|
||||||
?assertEqual(ok, ?ON(N1, emqx_ds_replication_layer_meta:join_db_site(?DB, S2))),
|
?assertEqual(ok, ?ON(N1, emqx_ds_replication_layer_meta:join_db_site(?DB, S2))),
|
||||||
%% Should be no-op.
|
%% Should be no-op.
|
||||||
|
@ -233,6 +248,7 @@ t_rebalance(Config) ->
|
||||||
|
|
||||||
?retry(1000, 10, ?assertEqual([], transitions(N1, ?DB))),
|
?retry(1000, 10, ?assertEqual([], transitions(N1, ?DB))),
|
||||||
|
|
||||||
|
?tp(test_start_rebalance2, #{}),
|
||||||
%% Now join the rest of the sites.
|
%% Now join the rest of the sites.
|
||||||
?assertEqual(ok, ds_repl_meta(N2, assign_db_sites, [?DB, Sites])),
|
?assertEqual(ok, ds_repl_meta(N2, assign_db_sites, [?DB, Sites])),
|
||||||
ct:pal("Transitions (~p -> ~p): ~p~n", [[S1, S2], Sites, transitions(N1, ?DB)]),
|
ct:pal("Transitions (~p -> ~p): ~p~n", [[S1, S2], Sites, transitions(N1, ?DB)]),
|
||||||
|
@ -619,7 +635,9 @@ without_extra(L) ->
|
||||||
-type ds_stream() :: emqx_utils_stream:stream({emqx_ds:message_key(), emqx_types:message()}).
|
-type ds_stream() :: emqx_utils_stream:stream({emqx_ds:message_key(), emqx_types:message()}).
|
||||||
|
|
||||||
%% Create a stream from the topic (wildcards are NOT supported for a
|
%% Create a stream from the topic (wildcards are NOT supported for a
|
||||||
%% good reason: order of messages is implementation-dependent!):
|
%% good reason: order of messages is implementation-dependent!).
|
||||||
|
%%
|
||||||
|
%% Note: stream produces messages with keys
|
||||||
-spec ds_topic_stream(binary(), binary(), node()) -> ds_stream().
|
-spec ds_topic_stream(binary(), binary(), node()) -> ds_stream().
|
||||||
ds_topic_stream(ClientId, TopicBin, Node) ->
|
ds_topic_stream(ClientId, TopicBin, Node) ->
|
||||||
Topic = emqx_topic:words(TopicBin),
|
Topic = emqx_topic:words(TopicBin),
|
||||||
|
@ -638,7 +656,6 @@ ds_topic_stream(ClientId, TopicBin, Node) ->
|
||||||
|| {_RankY, S} <- lists:sort(DSStreams)
|
|| {_RankY, S} <- lists:sort(DSStreams)
|
||||||
]).
|
]).
|
||||||
|
|
||||||
%% Note: produces messages with keys
|
|
||||||
ds_topic_generation_stream(Node, Shard, Topic, Stream) ->
|
ds_topic_generation_stream(Node, Shard, Topic, Stream) ->
|
||||||
{ok, Iterator} = ?ON(
|
{ok, Iterator} = ?ON(
|
||||||
Node,
|
Node,
|
||||||
|
@ -647,11 +664,20 @@ ds_topic_generation_stream(Node, Shard, Topic, Stream) ->
|
||||||
do_ds_topic_generation_stream(Node, Shard, Iterator).
|
do_ds_topic_generation_stream(Node, Shard, Iterator).
|
||||||
|
|
||||||
do_ds_topic_generation_stream(Node, Shard, It0) ->
|
do_ds_topic_generation_stream(Node, Shard, It0) ->
|
||||||
Now = 99999999999999999999,
|
|
||||||
fun() ->
|
fun() ->
|
||||||
case ?ON(Node, emqx_ds_storage_layer:next(Shard, It0, 1, Now)) of
|
case
|
||||||
|
?ON(
|
||||||
|
Node,
|
||||||
|
begin
|
||||||
|
Now = emqx_ds_replication_layer:current_timestamp(?DB, Shard),
|
||||||
|
emqx_ds_storage_layer:next(Shard, It0, 1, Now)
|
||||||
|
end
|
||||||
|
)
|
||||||
|
of
|
||||||
{ok, It, []} ->
|
{ok, It, []} ->
|
||||||
[];
|
[];
|
||||||
|
{ok, end_of_stream} ->
|
||||||
|
[];
|
||||||
{ok, It, [KeyMsg]} ->
|
{ok, It, [KeyMsg]} ->
|
||||||
[KeyMsg | do_ds_topic_generation_stream(Node, Shard, It)]
|
[KeyMsg | do_ds_topic_generation_stream(Node, Shard, It)]
|
||||||
end
|
end
|
||||||
|
@ -673,7 +699,11 @@ apply_stream(DB, NodeStream0, Stream0, N) ->
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
?ON(Node, emqx_ds:store_batch(DB, [Msg], #{sync => true})),
|
?ON(Node, emqx_ds:store_batch(DB, [Msg], #{sync => true})),
|
||||||
apply_stream(DB, NodeStream, Stream, N + 1)
|
apply_stream(DB, NodeStream, Stream, N + 1);
|
||||||
|
[add_generation | Stream] ->
|
||||||
|
[Node | NodeStream] = emqx_utils_stream:next(NodeStream0),
|
||||||
|
%% add_generation(Node, DB),
|
||||||
|
apply_stream(DB, NodeStream, Stream, N)
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Create an infinite list of messages from a given client:
|
%% @doc Create an infinite list of messages from a given client:
|
||||||
|
@ -724,7 +754,7 @@ verify_stream_effects(TestCase, Node, ClientId, ExpectedStream) ->
|
||||||
snabbkaffe_diff:assert_lists_eq(
|
snabbkaffe_diff:assert_lists_eq(
|
||||||
ExpectedStream,
|
ExpectedStream,
|
||||||
ds_topic_stream(ClientId, client_topic(TestCase, ClientId), Node),
|
ds_topic_stream(ClientId, client_topic(TestCase, ClientId), Node),
|
||||||
?diff_opts#{comment => #{clientid => ClientId, node => Node}}
|
?diff_opts
|
||||||
),
|
),
|
||||||
ct:pal("Data for client ~p on ~p is consistent.", [ClientId, Node])
|
ct:pal("Data for client ~p on ~p is consistent.", [ClientId, Node])
|
||||||
end
|
end
|
||||||
|
|
|
@ -20,13 +20,14 @@
|
||||||
-export([
|
-export([
|
||||||
empty/0,
|
empty/0,
|
||||||
list/1,
|
list/1,
|
||||||
|
const/1,
|
||||||
mqueue/1,
|
mqueue/1,
|
||||||
map/2,
|
map/2,
|
||||||
transpose/1,
|
transpose/1,
|
||||||
chain/1,
|
chain/1,
|
||||||
chain/2,
|
chain/2,
|
||||||
repeat/1,
|
repeat/1,
|
||||||
interleave/1,
|
interleave/2,
|
||||||
limit_length/2
|
limit_length/2
|
||||||
]).
|
]).
|
||||||
|
|
||||||
|
@ -72,6 +73,11 @@ list([]) ->
|
||||||
list([X | Rest]) ->
|
list([X | Rest]) ->
|
||||||
fun() -> [X | list(Rest)] end.
|
fun() -> [X | list(Rest)] end.
|
||||||
|
|
||||||
|
%% @doc Make a stream with a single element infinitely repeated
|
||||||
|
-spec const(T) -> stream(T).
|
||||||
|
const(T) ->
|
||||||
|
fun() -> [T | const(T)] end.
|
||||||
|
|
||||||
%% @doc Make a stream out of process message queue.
|
%% @doc Make a stream out of process message queue.
|
||||||
-spec mqueue(timeout()) -> stream(any()).
|
-spec mqueue(timeout()) -> stream(any()).
|
||||||
mqueue(Timeout) ->
|
mqueue(Timeout) ->
|
||||||
|
@ -158,8 +164,8 @@ repeat(S) ->
|
||||||
%% specifies size of the "batch" to be consumed from the stream at a
|
%% specifies size of the "batch" to be consumed from the stream at a
|
||||||
%% time (stream is the second tuple element). If element of the list
|
%% time (stream is the second tuple element). If element of the list
|
||||||
%% is a plain stream, then the batch size is assumed to be 1.
|
%% is a plain stream, then the batch size is assumed to be 1.
|
||||||
-spec interleave([stream(X) | {non_neg_integer(), stream(X)}]) -> stream(X).
|
-spec interleave([stream(X) | {non_neg_integer(), stream(X)}], boolean()) -> stream(X).
|
||||||
interleave(L0) ->
|
interleave(L0, ContinueAtEmpty) ->
|
||||||
L = lists:map(
|
L = lists:map(
|
||||||
fun
|
fun
|
||||||
(Stream) when is_function(Stream) ->
|
(Stream) when is_function(Stream) ->
|
||||||
|
@ -170,7 +176,7 @@ interleave(L0) ->
|
||||||
L0
|
L0
|
||||||
),
|
),
|
||||||
fun() ->
|
fun() ->
|
||||||
do_interleave(0, L, [])
|
do_interleave(ContinueAtEmpty, 0, L, [])
|
||||||
end.
|
end.
|
||||||
|
|
||||||
%% @doc Truncate list to the given length
|
%% @doc Truncate list to the given length
|
||||||
|
@ -281,21 +287,23 @@ csv_read_line([Line | Lines]) ->
|
||||||
csv_read_line([]) ->
|
csv_read_line([]) ->
|
||||||
eof.
|
eof.
|
||||||
|
|
||||||
do_interleave(_, [], []) ->
|
do_interleave(_Cont, _, [], []) ->
|
||||||
[];
|
[];
|
||||||
do_interleave(N, [{N, S} | Rest], Rev) ->
|
do_interleave(Cont, N, [{N, S} | Rest], Rev) ->
|
||||||
do_interleave(0, Rest, [{N, S} | Rev]);
|
do_interleave(Cont, 0, Rest, [{N, S} | Rev]);
|
||||||
do_interleave(_, [], Rev) ->
|
do_interleave(Cont, _, [], Rev) ->
|
||||||
do_interleave(0, lists:reverse(Rev), []);
|
do_interleave(Cont, 0, lists:reverse(Rev), []);
|
||||||
do_interleave(I, [{N, S} | Rest], Rev) when I < N ->
|
do_interleave(Cont, I, [{N, S} | Rest], Rev) when I < N ->
|
||||||
case next(S) of
|
case next(S) of
|
||||||
|
[] when Cont ->
|
||||||
|
do_interleave(Cont, 0, Rest, Rev);
|
||||||
[] ->
|
[] ->
|
||||||
do_interleave(0, Rest, Rev);
|
[];
|
||||||
[X | S1] ->
|
[X | S1] ->
|
||||||
[
|
[
|
||||||
X
|
X
|
||||||
| fun() ->
|
| fun() ->
|
||||||
do_interleave(I + 1, [{N, S1} | Rest], Rev)
|
do_interleave(Cont, I + 1, [{N, S1} | Rest], Rev)
|
||||||
end
|
end
|
||||||
]
|
]
|
||||||
end.
|
end.
|
||||||
|
|
Loading…
Reference in New Issue