feat(ds): Assign latest timestamp deterministically

This commit is contained in:
ieQu1 2024-05-08 23:15:46 +02:00
parent 2236af84ba
commit a0a3977043
No known key found for this signature in database
GPG Key ID: 488654DF3FED6FDE
4 changed files with 76 additions and 37 deletions

View File

@ -734,20 +734,20 @@ apply(
{State, Result}; {State, Result};
apply( apply(
_RaftMeta, _RaftMeta,
#{?tag := storage_event, ?payload := CustomEvent}, #{?tag := storage_event, ?payload := CustomEvent, ?now := Now},
#{db_shard := DBShard, latest := Latest0} = State #{db_shard := DBShard, latest := Latest0} = State
) -> ) ->
{Timestamp, Latest} = ensure_monotonic_timestamp(emqx_ds:timestamp_us(), Latest0), Latest = max(Latest0, Now),
set_ts(DBShard, Latest), set_ts(DBShard, Latest),
?tp( ?tp(
debug, debug,
emqx_ds_replication_layer_storage_event, emqx_ds_replication_layer_storage_event,
#{ #{
shard => DBShard, ts => Timestamp, payload => CustomEvent shard => DBShard, payload => CustomEvent, latest => Latest
} }
), ),
Effects = handle_custom_event(DBShard, Timestamp, CustomEvent), Effects = handle_custom_event(DBShard, Latest, CustomEvent),
{State#{latest := Latest}, ok, Effects}. {State#{latest => Latest}, ok, Effects}.
-spec tick(integer(), ra_state()) -> ra_machine:effects(). -spec tick(integer(), ra_state()) -> ra_machine:effects().
tick(TimeMs, #{db_shard := DBShard = {DB, Shard}, latest := Latest}) -> tick(TimeMs, #{db_shard := DBShard = {DB, Shard}, latest := Latest}) ->
@ -791,7 +791,7 @@ snapshot_module() ->
handle_custom_event(DBShard, Latest, Event) -> handle_custom_event(DBShard, Latest, Event) ->
try try
Events = emqx_ds_storage_layer:handle_event(DBShard, Latest, Event), Events = emqx_ds_storage_layer:handle_event(DBShard, Latest, Event),
[{append, #{?tag => storage_event, ?payload => I}} || I <- Events] [{append, #{?tag => storage_event, ?payload => I, ?now => Latest}} || I <- Events]
catch catch
EC:Err:Stacktrace -> EC:Err:Stacktrace ->
?tp(error, ds_storage_custom_even_fail, #{ ?tp(error, ds_storage_custom_even_fail, #{

View File

@ -43,5 +43,6 @@
%% custom events %% custom events
-define(payload, 2). -define(payload, 2).
-define(now, 3).
-endif. -endif.

View File

@ -41,7 +41,7 @@ opts(Overrides) ->
#{ #{
backend => builtin, backend => builtin,
%% storage => {emqx_ds_storage_reference, #{}}, %% storage => {emqx_ds_storage_reference, #{}},
storage => {emqx_ds_storage_bitfield_lts, #{epoch_bits => 1}}, storage => {emqx_ds_storage_bitfield_lts, #{epoch_bits => 10}},
n_shards => 16, n_shards => 16,
n_sites => 1, n_sites => 1,
replication_factor => 3, replication_factor => 3,
@ -159,7 +159,6 @@ t_rebalance('end', Config) ->
t_rebalance(Config) -> t_rebalance(Config) ->
NMsgs = 50, NMsgs = 50,
NClients = 5, NClients = 5,
NEvents = NMsgs * NClients,
%% List of fake client IDs: %% List of fake client IDs:
Clients = [integer_to_binary(I) || I <- lists:seq(1, NClients)], Clients = [integer_to_binary(I) || I <- lists:seq(1, NClients)],
%% List of streams that generate messages for each "client" in its own topic: %% List of streams that generate messages for each "client" in its own topic:
@ -168,7 +167,16 @@ t_rebalance(Config) ->
|| ClientId <- Clients || ClientId <- Clients
], ],
%% Interleaved list of events: %% Interleaved list of events:
Stream = emqx_utils_stream:interleave([{2, Stream} || {_ClientId, Stream} <- TopicStreams]), Stream0 = emqx_utils_stream:interleave(
[{2, Stream} || {_ClientId, Stream} <- TopicStreams], true
),
Stream = emqx_utils_stream:interleave(
[
{50, Stream0},
emqx_utils_stream:const(add_generation)
],
false
),
Nodes = [N1, N2, N3, N4] = ?config(nodes, Config), Nodes = [N1, N2, N3, N4] = ?config(nodes, Config),
?check_trace( ?check_trace(
#{timetrap => 30_000}, #{timetrap => 30_000},
@ -176,15 +184,22 @@ t_rebalance(Config) ->
%% 0. Inject schedulings to make sure the messages are %% 0. Inject schedulings to make sure the messages are
%% written to the storage before, during, and after %% written to the storage before, during, and after
%% rebalance: %% rebalance:
?force_ordering(#{?snk_kind := test_push_message, n := 10}, #{ ?force_ordering(
?snk_kind := test_start_rebalance #{?snk_kind := test_push_message, n := 10},
}), #{?snk_kind := test_start_rebalance}
?force_ordering(#{?snk_kind := test_start_rebalance}, #{ ),
?snk_kind := test_push_message, n := 20 ?force_ordering(
}), #{?snk_kind := test_start_rebalance1},
?force_ordering(#{?snk_kind := test_end_rebalance}, #{ #{?snk_kind := test_push_message, n := 20}
?snk_kind := test_push_message, n := 30 ),
}), ?force_ordering(
#{?snk_kind := test_push_message, n := 30},
#{?snk_kind := test_start_rebalance2}
),
?force_ordering(
#{?snk_kind := test_end_rebalance},
#{?snk_kind := test_push_message, n := 40}
),
%% 1. Initialize DB on the first node. %% 1. Initialize DB on the first node.
Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}), Opts = opts(#{n_shards => 16, n_sites => 1, replication_factor => 3}),
@ -224,7 +239,7 @@ t_rebalance(Config) ->
), ),
%% 3. Start rebalance in the meanwhile: %% 3. Start rebalance in the meanwhile:
?tp(test_start_rebalance, #{}), ?tp(test_start_rebalance1, #{}),
%% 3.1 Join the second site to the DB replication sites. %% 3.1 Join the second site to the DB replication sites.
?assertEqual(ok, ?ON(N1, emqx_ds_replication_layer_meta:join_db_site(?DB, S2))), ?assertEqual(ok, ?ON(N1, emqx_ds_replication_layer_meta:join_db_site(?DB, S2))),
%% Should be no-op. %% Should be no-op.
@ -233,6 +248,7 @@ t_rebalance(Config) ->
?retry(1000, 10, ?assertEqual([], transitions(N1, ?DB))), ?retry(1000, 10, ?assertEqual([], transitions(N1, ?DB))),
?tp(test_start_rebalance2, #{}),
%% Now join the rest of the sites. %% Now join the rest of the sites.
?assertEqual(ok, ds_repl_meta(N2, assign_db_sites, [?DB, Sites])), ?assertEqual(ok, ds_repl_meta(N2, assign_db_sites, [?DB, Sites])),
ct:pal("Transitions (~p -> ~p): ~p~n", [[S1, S2], Sites, transitions(N1, ?DB)]), ct:pal("Transitions (~p -> ~p): ~p~n", [[S1, S2], Sites, transitions(N1, ?DB)]),
@ -619,7 +635,9 @@ without_extra(L) ->
-type ds_stream() :: emqx_utils_stream:stream({emqx_ds:message_key(), emqx_types:message()}). -type ds_stream() :: emqx_utils_stream:stream({emqx_ds:message_key(), emqx_types:message()}).
%% Create a stream from the topic (wildcards are NOT supported for a %% Create a stream from the topic (wildcards are NOT supported for a
%% good reason: order of messages is implementation-dependent!): %% good reason: order of messages is implementation-dependent!).
%%
%% Note: stream produces messages with keys
-spec ds_topic_stream(binary(), binary(), node()) -> ds_stream(). -spec ds_topic_stream(binary(), binary(), node()) -> ds_stream().
ds_topic_stream(ClientId, TopicBin, Node) -> ds_topic_stream(ClientId, TopicBin, Node) ->
Topic = emqx_topic:words(TopicBin), Topic = emqx_topic:words(TopicBin),
@ -638,7 +656,6 @@ ds_topic_stream(ClientId, TopicBin, Node) ->
|| {_RankY, S} <- lists:sort(DSStreams) || {_RankY, S} <- lists:sort(DSStreams)
]). ]).
%% Note: produces messages with keys
ds_topic_generation_stream(Node, Shard, Topic, Stream) -> ds_topic_generation_stream(Node, Shard, Topic, Stream) ->
{ok, Iterator} = ?ON( {ok, Iterator} = ?ON(
Node, Node,
@ -647,11 +664,20 @@ ds_topic_generation_stream(Node, Shard, Topic, Stream) ->
do_ds_topic_generation_stream(Node, Shard, Iterator). do_ds_topic_generation_stream(Node, Shard, Iterator).
do_ds_topic_generation_stream(Node, Shard, It0) -> do_ds_topic_generation_stream(Node, Shard, It0) ->
Now = 99999999999999999999,
fun() -> fun() ->
case ?ON(Node, emqx_ds_storage_layer:next(Shard, It0, 1, Now)) of case
?ON(
Node,
begin
Now = emqx_ds_replication_layer:current_timestamp(?DB, Shard),
emqx_ds_storage_layer:next(Shard, It0, 1, Now)
end
)
of
{ok, It, []} -> {ok, It, []} ->
[]; [];
{ok, end_of_stream} ->
[];
{ok, It, [KeyMsg]} -> {ok, It, [KeyMsg]} ->
[KeyMsg | do_ds_topic_generation_stream(Node, Shard, It)] [KeyMsg | do_ds_topic_generation_stream(Node, Shard, It)]
end end
@ -673,7 +699,11 @@ apply_stream(DB, NodeStream0, Stream0, N) ->
) )
), ),
?ON(Node, emqx_ds:store_batch(DB, [Msg], #{sync => true})), ?ON(Node, emqx_ds:store_batch(DB, [Msg], #{sync => true})),
apply_stream(DB, NodeStream, Stream, N + 1) apply_stream(DB, NodeStream, Stream, N + 1);
[add_generation | Stream] ->
[Node | NodeStream] = emqx_utils_stream:next(NodeStream0),
%% add_generation(Node, DB),
apply_stream(DB, NodeStream, Stream, N)
end. end.
%% @doc Create an infinite list of messages from a given client: %% @doc Create an infinite list of messages from a given client:
@ -724,7 +754,7 @@ verify_stream_effects(TestCase, Node, ClientId, ExpectedStream) ->
snabbkaffe_diff:assert_lists_eq( snabbkaffe_diff:assert_lists_eq(
ExpectedStream, ExpectedStream,
ds_topic_stream(ClientId, client_topic(TestCase, ClientId), Node), ds_topic_stream(ClientId, client_topic(TestCase, ClientId), Node),
?diff_opts#{comment => #{clientid => ClientId, node => Node}} ?diff_opts
), ),
ct:pal("Data for client ~p on ~p is consistent.", [ClientId, Node]) ct:pal("Data for client ~p on ~p is consistent.", [ClientId, Node])
end end

View File

@ -20,13 +20,14 @@
-export([ -export([
empty/0, empty/0,
list/1, list/1,
const/1,
mqueue/1, mqueue/1,
map/2, map/2,
transpose/1, transpose/1,
chain/1, chain/1,
chain/2, chain/2,
repeat/1, repeat/1,
interleave/1, interleave/2,
limit_length/2 limit_length/2
]). ]).
@ -72,6 +73,11 @@ list([]) ->
list([X | Rest]) -> list([X | Rest]) ->
fun() -> [X | list(Rest)] end. fun() -> [X | list(Rest)] end.
%% @doc Make a stream with a single element infinitely repeated
-spec const(T) -> stream(T).
const(T) ->
fun() -> [T | const(T)] end.
%% @doc Make a stream out of process message queue. %% @doc Make a stream out of process message queue.
-spec mqueue(timeout()) -> stream(any()). -spec mqueue(timeout()) -> stream(any()).
mqueue(Timeout) -> mqueue(Timeout) ->
@ -158,8 +164,8 @@ repeat(S) ->
%% specifies size of the "batch" to be consumed from the stream at a %% specifies size of the "batch" to be consumed from the stream at a
%% time (stream is the second tuple element). If element of the list %% time (stream is the second tuple element). If element of the list
%% is a plain stream, then the batch size is assumed to be 1. %% is a plain stream, then the batch size is assumed to be 1.
-spec interleave([stream(X) | {non_neg_integer(), stream(X)}]) -> stream(X). -spec interleave([stream(X) | {non_neg_integer(), stream(X)}], boolean()) -> stream(X).
interleave(L0) -> interleave(L0, ContinueAtEmpty) ->
L = lists:map( L = lists:map(
fun fun
(Stream) when is_function(Stream) -> (Stream) when is_function(Stream) ->
@ -170,7 +176,7 @@ interleave(L0) ->
L0 L0
), ),
fun() -> fun() ->
do_interleave(0, L, []) do_interleave(ContinueAtEmpty, 0, L, [])
end. end.
%% @doc Truncate list to the given length %% @doc Truncate list to the given length
@ -281,21 +287,23 @@ csv_read_line([Line | Lines]) ->
csv_read_line([]) -> csv_read_line([]) ->
eof. eof.
do_interleave(_, [], []) -> do_interleave(_Cont, _, [], []) ->
[]; [];
do_interleave(N, [{N, S} | Rest], Rev) -> do_interleave(Cont, N, [{N, S} | Rest], Rev) ->
do_interleave(0, Rest, [{N, S} | Rev]); do_interleave(Cont, 0, Rest, [{N, S} | Rev]);
do_interleave(_, [], Rev) -> do_interleave(Cont, _, [], Rev) ->
do_interleave(0, lists:reverse(Rev), []); do_interleave(Cont, 0, lists:reverse(Rev), []);
do_interleave(I, [{N, S} | Rest], Rev) when I < N -> do_interleave(Cont, I, [{N, S} | Rest], Rev) when I < N ->
case next(S) of case next(S) of
[] when Cont ->
do_interleave(Cont, 0, Rest, Rev);
[] -> [] ->
do_interleave(0, Rest, Rev); [];
[X | S1] -> [X | S1] ->
[ [
X X
| fun() -> | fun() ->
do_interleave(I + 1, [{N, S1} | Rest], Rev) do_interleave(Cont, I + 1, [{N, S1} | Rest], Rev)
end end
] ]
end. end.