feat(ds): Implement ratchet function for bitmask keymapper

This commit is contained in:
ieQu1 2023-10-14 01:01:10 +02:00
parent 164ae9e94a
commit ef46c09caf
7 changed files with 436 additions and 1959 deletions

View File

@ -40,7 +40,10 @@
init() ->
?WHEN_ENABLED(begin
ok = emqx_ds:open_db(?PERSISTENT_MESSAGE_DB, #{}),
ok = emqx_ds:open_db(?PERSISTENT_MESSAGE_DB, #{
backend => builtin,
storage => {emqx_ds_storage_bitfield_lts, #{}}
}),
ok = emqx_persistent_session_ds_router:init_tables(),
ok = emqx_persistent_session_ds:create_tables(),
ok

View File

@ -0,0 +1,36 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-ifndef(EMQX_DS_BITMASK_HRL).
-define(EMQX_DS_BITMASK_HRL, true).
-record(filter_scan_action, {
offset :: emqx_ds_bitmask_keymapper:offset(),
size :: emqx_ds_bitmask_keymapper:bitsize(),
min :: non_neg_integer(),
max :: non_neg_integer()
}).
-record(filter, {
size :: non_neg_integer(),
bitfilter :: non_neg_integer(),
bitmask :: non_neg_integer(),
%% Ranges (in _bitsource_ basis):
bitsource_ranges :: array:array(#filter_scan_action{}),
range_min :: non_neg_integer(),
range_max :: non_neg_integer()
}).
-endif.

View File

@ -86,9 +86,12 @@
bin_vector_to_key/2,
key_to_vector/2,
bin_key_to_vector/2,
next_range/3,
key_to_bitstring/2,
bitstring_to_key/2,
make_filter/2,
ratchet/2,
bin_increment/2,
bin_checkmask/2,
bitsize/1
]).
@ -149,6 +152,10 @@
-type scalar_range() :: any | {'=', scalar() | infinity} | {'>=', scalar()}.
-include("emqx_ds_bitmask.hrl").
-type filter() :: #filter{}.
%%================================================================================
%% API functions
%%================================================================================
@ -237,36 +244,6 @@ bin_key_to_vector(Keymapper = #keymapper{dim_sizeof = DimSizeof, size = Size}, B
lists:zip(Vector, DimSizeof)
).
%% @doc Given a keymapper, a filter, and a key, return a triple containing:
%%
%% 1. `NextKey', a key that is greater than the given one, and is
%% within the given range.
%%
%% 2. `Bitmask'
%%
%% 3. `Bitfilter'
%%
%% Bitmask and bitfilter can be used to verify that key any K is in
%% the range using the following inequality:
%%
%% K >= NextKey && (K band Bitmask) =:= Bitfilter.
%%
%% ...or `undefined' if the next key is outside the range.
-spec next_range(keymapper(), [scalar_range()], key()) -> {key(), integer(), integer()} | undefined.
next_range(Keymapper, Filter0, PrevKey) ->
%% Key -> Vector -> +1 on vector -> Key
Filter = desugar_filter(Keymapper, Filter0),
PrevVec = key_to_vector(Keymapper, PrevKey),
case inc_vector(Filter, PrevVec) of
overflow ->
undefined;
NextVec ->
NewKey = vector_to_key(Keymapper, NextVec),
Bitmask = make_bitmask(Keymapper, Filter),
Bitfilter = NewKey band Bitmask,
{NewKey, Bitmask, Bitfilter}
end.
-spec bitstring_to_key(keymapper(), bitstring()) -> key().
bitstring_to_key(#keymapper{size = Size}, Bin) ->
case Bin of
@ -280,60 +257,208 @@ bitstring_to_key(#keymapper{size = Size}, Bin) ->
key_to_bitstring(#keymapper{size = Size}, Key) ->
<<Key:Size>>.
%% @doc Create a filter object that facilitates range scans.
-spec make_filter(keymapper(), [scalar_range()]) -> filter().
make_filter(KeyMapper = #keymapper{schema = Schema, dim_sizeof = DimSizeof, size = Size}, Filter0) ->
NDim = length(DimSizeof),
%% Transform "symbolic" inequations to ranges:
Filter1 = inequations_to_ranges(KeyMapper, Filter0),
{Bitmask, Bitfilter} = make_bitfilter(KeyMapper, Filter1),
%% Calculate maximum source offset as per bitsource specification:
MaxOffset = lists:foldl(
fun({Dim, Offset, _Size}, Acc) ->
maps:update_with(Dim, fun(OldVal) -> max(OldVal, Offset) end, 0, Acc)
end,
#{},
Schema
),
%% Adjust minimum and maximum values for each interval like this:
%%
%% Min: 110100|101011 -> 110100|00000
%% Max: 110101|001011 -> 110101|11111
%% ^
%% |
%% max offset
%%
%% This is needed so when we increment the vector, we always scan
%% the full range of least significant bits.
Filter2 = lists:map(
fun
({{Val, Val}, _Dim}) ->
{Val, Val};
({{Min0, Max0}, Dim}) ->
Offset = maps:get(Dim, MaxOffset, 0),
%% Set least significant bits of Min to 0:
Min = (Min0 bsr Offset) bsl Offset,
%% Set least significant bits of Max to 1:
Max = Max0 bor ones(Offset),
{Min, Max}
end,
lists:zip(Filter1, lists:seq(1, NDim))
),
%% Project the vector into "bitsource coordinate system":
{_, Filter} = fold_bitsources(
fun(DstOffset, {Dim, SrcOffset, Size}, Acc) ->
{Min0, Max0} = lists:nth(Dim, Filter2),
Min = (Min0 bsr SrcOffset) band ones(Size),
Max = (Max0 bsr SrcOffset) band ones(Size),
Action = #filter_scan_action{
offset = DstOffset,
size = Size,
min = Min,
max = Max
},
[Action | Acc]
end,
[],
Schema
),
Ranges = array:from_list(lists:reverse(Filter)),
%% Compute estimated upper and lower bounds of a _continous_
%% interval where all keys lie:
case Filter of
[] ->
RangeMin = 0,
RangeMax = 0;
[#filter_scan_action{offset = MSBOffset, min = MSBMin, max = MSBMax} | _] ->
RangeMin = MSBMin bsl MSBOffset,
RangeMax = MSBMax bsl MSBOffset bor ones(MSBOffset)
end,
%% Final value
#filter{
size = Size,
bitmask = Bitmask,
bitfilter = Bitfilter,
bitsource_ranges = Ranges,
range_min = RangeMin,
range_max = RangeMax
}.
-spec ratchet(filter(), key()) -> key() | overflow.
ratchet(#filter{bitsource_ranges = Ranges, range_max = Max}, Key) when Key =< Max ->
NDim = array:size(Ranges),
case ratchet_scan(Ranges, NDim, Key, 0, _Pivot = {-1, 0}, _Carry = 0) of
overflow ->
overflow;
{Pivot, Increment} ->
ratchet_do(Ranges, Key, NDim - 1, Pivot, Increment)
end;
ratchet(_, _) ->
overflow.
-spec bin_increment(filter(), binary()) -> binary() | overflow.
bin_increment(Filter = #filter{size = Size}, <<>>) ->
Key = ratchet(Filter, 0),
<<Key:Size>>;
bin_increment(Filter = #filter{size = Size, bitmask = Bitmask, bitfilter = Bitfilter}, KeyBin) ->
<<Key0:Size>> = KeyBin,
Key1 = Key0 + 1,
if
Key1 band Bitmask =:= Bitfilter ->
%% TODO: check overflow
<<Key1:Size>>;
true ->
case ratchet(Filter, Key1) of
overflow ->
overflow;
Key ->
<<Key:Size>>
end
end.
-spec bin_checkmask(filter(), binary()) -> boolean().
bin_checkmask(#filter{size = Size, bitmask = Bitmask, bitfilter = Bitfilter}, Key) ->
case Key of
<<Int:Size>> ->
Int band Bitmask =:= Bitfilter;
_ ->
false
end.
%%================================================================================
%% Internal functions
%%================================================================================
-spec make_bitmask(keymapper(), [{non_neg_integer(), non_neg_integer()}]) -> non_neg_integer().
make_bitmask(Keymapper = #keymapper{dim_sizeof = DimSizeof}, Ranges) ->
BitmaskVector = lists:map(
%% Note: this function operates in bitsource basis, scanning it from 0
%% to NDim (i.e. from the least significant bits to the most
%% significant bits)
ratchet_scan(_Ranges, NDim, _Key, NDim, Pivot, 0) ->
%% We've reached the end:
Pivot;
ratchet_scan(_Ranges, NDim, _Key, NDim, _Pivot, 1) ->
%% We've reached the end, but key is still not large enough:
overflow;
ratchet_scan(Ranges, NDim, Key, I, Pivot0, Carry) ->
#filter_scan_action{offset = Offset, size = Size, min = Min, max = Max} = array:get(I, Ranges),
%% Extract I-th element of the vector from the original key:
Elem = ((Key bsr Offset) band ones(Size)) + Carry,
if
Elem < Min ->
%% I-th coordinate is less than the specified minimum.
%%
%% We reset this coordinate to the minimum value. It means
%% we incremented this bitposition, the less significant
%% bits have to be reset to their respective minimum
%% values:
Pivot = {I + 1, 0},
ratchet_scan(Ranges, NDim, Key, I + 1, Pivot, 0);
Elem > Max ->
%% I-th coordinate is larger than the specified
%% minimum. We can only fix this problem by incrementing
%% the next coordinate (i.e. more significant bits).
%%
%% We reset this coordinate to the minimum value, and
%% increment the next coordinate (by setting `Carry' to
%% 1).
Pivot = {I + 1, 1},
ratchet_scan(Ranges, NDim, Key, I + 1, Pivot, 1);
true ->
%% Coordinate is within range:
ratchet_scan(Ranges, NDim, Key, I + 1, Pivot0, 0)
end.
%% Note: this function operates in bitsource basis, scanning it from
%% NDim to 0. It applies the transformation specified by
%% `ratchet_scan'.
ratchet_do(Ranges, Key, I, _Pivot, _Increment) when I < 0 ->
0;
ratchet_do(Ranges, Key, I, Pivot, Increment) ->
#filter_scan_action{offset = Offset, size = Size, min = Min} = array:get(I, Ranges),
Mask = ones(Offset + Size) bxor ones(Offset),
Elem =
if
I > Pivot ->
Mask band Key;
I =:= Pivot ->
(Mask band Key) + (Increment bsl Offset);
true ->
Min bsl Offset
end,
%% erlang:display(
%% {ratchet_do, I, integer_to_list(Key, 16), integer_to_list(Mask, 2),
%% integer_to_list(Elem, 16)}
%% ),
Elem bor ratchet_do(Ranges, Key, I - 1, Pivot, Increment).
-spec make_bitfilter(keymapper(), [{non_neg_integer(), non_neg_integer()}]) ->
{non_neg_integer(), non_neg_integer()}.
make_bitfilter(Keymapper = #keymapper{dim_sizeof = DimSizeof}, Ranges) ->
L = lists:map(
fun
({{N, N}, Bits}) ->
%% For strict equality we can employ bitmask:
ones(Bits);
{ones(Bits), N};
(_) ->
0
{0, 0}
end,
lists:zip(Ranges, DimSizeof)
),
vector_to_key(Keymapper, BitmaskVector).
-spec inc_vector([{non_neg_integer(), non_neg_integer()}], vector()) -> vector() | overflow.
inc_vector(Filter, Vec0) ->
case normalize_vector(Filter, Vec0) of
{true, Vec} ->
Vec;
{false, Vec} ->
do_inc_vector(Filter, Vec, [])
end.
do_inc_vector([], [], _Acc) ->
overflow;
do_inc_vector([{Min, Max} | Intervals], [Elem | Vec], Acc) ->
case Elem of
Max ->
do_inc_vector(Intervals, Vec, [Min | Acc]);
_ when Elem < Max ->
lists:reverse(Acc) ++ [Elem + 1 | Vec]
end.
normalize_vector(Intervals, Vec0) ->
Vec = lists:map(
fun
({{Min, _Max}, Elem}) when Min > Elem ->
Min;
({{_Min, Max}, Elem}) when Max < Elem ->
Max;
({_, Elem}) ->
Elem
end,
lists:zip(Intervals, Vec0)
),
{Vec > Vec0, Vec}.
{Bitmask, Bitfilter} = lists:unzip(L),
{vector_to_key(Keymapper, Bitmask), vector_to_key(Keymapper, Bitfilter)}.
%% Transform inequalities into a list of closed intervals that the
%% vector elements should lie in.
desugar_filter(#keymapper{dim_sizeof = DimSizeof}, Filter) ->
inequations_to_ranges(#keymapper{dim_sizeof = DimSizeof}, Filter) ->
lists:map(
fun
({any, Bitsize}) ->
@ -390,24 +515,6 @@ ones(Bits) ->
-ifdef(TEST).
%% %% Create a bitmask that is sufficient to cover a given number. E.g.:
%% %%
%% %% 2#1000 -> 2#1111; 2#0 -> 2#0; 2#10101 -> 2#11111
%% bitmask_of(N) ->
%% %% FIXME: avoid floats
%% NBits = ceil(math:log2(N + 1)),
%% ones(NBits).
%% bitmask_of_test() ->
%% ?assertEqual(2#0, bitmask_of(0)),
%% ?assertEqual(2#1, bitmask_of(1)),
%% ?assertEqual(2#11, bitmask_of(2#10)),
%% ?assertEqual(2#11, bitmask_of(2#11)),
%% ?assertEqual(2#1111, bitmask_of(2#1000)),
%% ?assertEqual(2#1111, bitmask_of(2#1111)),
%% ?assertEqual(ones(128), bitmask_of(ones(128))),
%% ?assertEqual(ones(256), bitmask_of(ones(256))).
make_keymapper0_test() ->
Schema = [],
?assertEqual(
@ -510,235 +617,117 @@ key_to_vector2_test() ->
key2vec(Schema, [0, 1]),
key2vec(Schema, [255, 0]).
inc_vector0_test() ->
Keymapper = make_keymapper([]),
?assertMatch(overflow, incvec(Keymapper, [], [])).
inc_vector1_test() ->
Keymapper = make_keymapper([{1, 0, 8}]),
?assertMatch([3], incvec(Keymapper, [{'=', 3}], [1])),
?assertMatch([3], incvec(Keymapper, [{'=', 3}], [2])),
?assertMatch(overflow, incvec(Keymapper, [{'=', 3}], [3])),
?assertMatch(overflow, incvec(Keymapper, [{'=', 3}], [4])),
?assertMatch(overflow, incvec(Keymapper, [{'=', 3}], [255])),
%% Now with >=:
?assertMatch([1], incvec(Keymapper, [{'>=', 0}], [0])),
?assertMatch([255], incvec(Keymapper, [{'>=', 0}], [254])),
?assertMatch(overflow, incvec(Keymapper, [{'>=', 0}], [255])),
?assertMatch([100], incvec(Keymapper, [{'>=', 100}], [0])),
?assertMatch([100], incvec(Keymapper, [{'>=', 100}], [99])),
?assertMatch([255], incvec(Keymapper, [{'>=', 100}], [254])),
?assertMatch(overflow, incvec(Keymapper, [{'>=', 100}], [255])).
inc_vector2_test() ->
Keymapper = make_keymapper([{1, 0, 8}, {2, 0, 8}, {3, 0, 8}]),
Filter = [{'>=', 0}, {'=', 100}, {'>=', 30}],
?assertMatch([0, 100, 30], incvec(Keymapper, Filter, [0, 0, 0])),
?assertMatch([1, 100, 30], incvec(Keymapper, Filter, [0, 100, 30])),
?assertMatch([255, 100, 30], incvec(Keymapper, Filter, [254, 100, 30])),
?assertMatch([0, 100, 31], incvec(Keymapper, Filter, [255, 100, 30])),
?assertMatch([0, 100, 30], incvec(Keymapper, Filter, [0, 100, 29])),
?assertMatch(overflow, incvec(Keymapper, Filter, [255, 100, 255])),
?assertMatch([255, 100, 255], incvec(Keymapper, Filter, [254, 100, 255])),
?assertMatch([0, 100, 255], incvec(Keymapper, Filter, [255, 100, 254])),
%% Nasty cases (shouldn't happen, hopefully):
?assertMatch([1, 100, 30], incvec(Keymapper, Filter, [0, 101, 0])),
?assertMatch([1, 100, 33], incvec(Keymapper, Filter, [0, 101, 33])),
?assertMatch([0, 100, 255], incvec(Keymapper, Filter, [255, 101, 254])),
?assertMatch(overflow, incvec(Keymapper, Filter, [255, 101, 255])).
make_bitmask0_test() ->
Keymapper = make_keymapper([]),
?assertMatch(0, mkbmask(Keymapper, [])).
?assertMatch({0, 0}, mkbmask(Keymapper, [])).
make_bitmask1_test() ->
Keymapper = make_keymapper([{1, 0, 8}]),
?assertEqual(0, mkbmask(Keymapper, [any])),
?assertEqual(16#ff, mkbmask(Keymapper, [{'=', 1}])),
?assertEqual(16#ff, mkbmask(Keymapper, [{'=', 255}])),
?assertEqual(0, mkbmask(Keymapper, [{'>=', 0}])),
?assertEqual(0, mkbmask(Keymapper, [{'>=', 1}])),
?assertEqual(0, mkbmask(Keymapper, [{'>=', 16#f}])).
?assertEqual({0, 0}, mkbmask(Keymapper, [any])),
?assertEqual({16#ff, 1}, mkbmask(Keymapper, [{'=', 1}])),
?assertEqual({16#ff, 255}, mkbmask(Keymapper, [{'=', 255}])),
?assertEqual({0, 0}, mkbmask(Keymapper, [{'>=', 0}])),
?assertEqual({0, 0}, mkbmask(Keymapper, [{'>=', 1}])),
?assertEqual({0, 0}, mkbmask(Keymapper, [{'>=', 16#f}])).
make_bitmask2_test() ->
Keymapper = make_keymapper([{1, 0, 3}, {2, 0, 4}, {3, 0, 2}]),
?assertEqual(2#00_0000_000, mkbmask(Keymapper, [any, any, any])),
?assertEqual(2#11_0000_000, mkbmask(Keymapper, [any, any, {'=', 0}])),
?assertEqual(2#00_1111_000, mkbmask(Keymapper, [any, {'=', 0}, any])),
?assertEqual(2#00_0000_111, mkbmask(Keymapper, [{'=', 0}, any, any])).
?assertEqual({2#00_0000_000, 2#00_0000_000}, mkbmask(Keymapper, [any, any, any])),
?assertEqual({2#11_0000_000, 2#00_0000_000}, mkbmask(Keymapper, [any, any, {'=', 0}])),
?assertEqual({2#00_1111_000, 2#00_0000_000}, mkbmask(Keymapper, [any, {'=', 0}, any])),
?assertEqual({2#00_0000_111, 2#00_0000_000}, mkbmask(Keymapper, [{'=', 0}, any, any])).
make_bitmask3_test() ->
%% Key format of type |TimeOffset|Topic|Epoch|:
Keymapper = make_keymapper([{1, 8, 8}, {2, 0, 8}, {1, 0, 8}]),
?assertEqual(2#00000000_00000000_00000000, mkbmask(Keymapper, [any, any])),
?assertEqual(2#11111111_11111111_11111111, mkbmask(Keymapper, [{'=', 33}, {'=', 22}])),
?assertEqual(2#11111111_11111111_11111111, mkbmask(Keymapper, [{'=', 33}, {'=', 22}])),
?assertEqual(2#00000000_11111111_00000000, mkbmask(Keymapper, [{'>=', 255}, {'=', 22}])).
Keymapper = make_keymapper([{1, 0, 8}, {2, 0, 8}, {1, 8, 8}]),
?assertEqual({2#00000000_00000000_00000000, 16#00_00_00}, mkbmask(Keymapper, [any, any])),
?assertEqual(
{2#11111111_11111111_11111111, 16#aa_cc_bb},
mkbmask(Keymapper, [{'=', 16#aabb}, {'=', 16#cc}])
),
?assertEqual(
{2#00000000_11111111_00000000, 16#00_bb_00}, mkbmask(Keymapper, [{'>=', 255}, {'=', 16#bb}])
).
next_range0_test() ->
Keymapper = make_keymapper([]),
make_filter_test() ->
KeyMapper = make_keymapper([]),
Filter = [],
PrevKey = 0,
?assertMatch(undefined, next_range(Keymapper, Filter, PrevKey)).
?assertMatch(#filter{size = 0, bitmask = 0, bitfilter = 0}, make_filter(KeyMapper, Filter)).
next_range1_test() ->
Keymapper = make_keymapper([{1, 0, 8}, {2, 0, 8}]),
?assertMatch(undefined, next_range(Keymapper, [{'=', 0}, {'=', 0}], 0)),
?assertMatch({1, 16#ffff, 1}, next_range(Keymapper, [{'=', 1}, {'=', 0}], 0)),
?assertMatch({16#100, 16#ffff, 16#100}, next_range(Keymapper, [{'=', 0}, {'=', 1}], 0)),
%% Now with any:
?assertMatch({1, 0, 0}, next_range(Keymapper, [any, any], 0)),
?assertMatch({2, 0, 0}, next_range(Keymapper, [any, any], 1)),
?assertMatch({16#fffb, 0, 0}, next_range(Keymapper, [any, any], 16#fffa)),
%% Now with >=:
ratchet1_test() ->
Bitsources = [{1, 0, 8}],
M = make_keymapper(Bitsources),
F = make_filter(M, [any]),
#filter{bitsource_ranges = Rarr} = F,
?assertMatch(
{16#42_30, 16#ff00, 16#42_00}, next_range(Keymapper, [{'>=', 16#30}, {'=', 16#42}], 0)
),
?assertMatch(
{16#42_31, 16#ff00, 16#42_00},
next_range(Keymapper, [{'>=', 16#30}, {'=', 16#42}], 16#42_30)
[
#filter_scan_action{
offset = 0,
size = 8,
min = 0,
max = 16#ff
}
],
array:to_list(Rarr)
),
?assertEqual(0, ratchet(F, 0)),
?assertEqual(16#fa, ratchet(F, 16#fa)),
?assertEqual(16#ff, ratchet(F, 16#ff)),
?assertEqual(overflow, ratchet(F, 16#100), "TBD: filter must store the upper bound").
?assertMatch(
{16#30_42, 16#00ff, 16#00_42}, next_range(Keymapper, [{'=', 16#42}, {'>=', 16#30}], 0)
),
?assertMatch(
{16#31_42, 16#00ff, 16#00_42},
next_range(Keymapper, [{'=', 16#42}, {'>=', 16#30}], 16#00_43)
).
%% erlfmt-ignore
ratchet2_test() ->
Bitsources = [{1, 0, 8}, %% Static topic index
{2, 8, 8}, %% Epoch
{3, 0, 8}, %% Varying topic hash
{2, 0, 8}], %% Timestamp offset
M = make_keymapper(lists:reverse(Bitsources)),
F1 = make_filter(M, [{'=', 16#aa}, any, {'=', 16#cc}]),
?assertEqual(16#aa00cc00, ratchet(F1, 0)),
?assertEqual(16#aa01cc00, ratchet(F1, 16#aa00cd00)),
?assertEqual(16#aa01cc11, ratchet(F1, 16#aa01cc11)),
?assertEqual(16#aa11cc00, ratchet(F1, 16#aa10cd00)),
?assertEqual(16#aa11cc00, ratchet(F1, 16#aa10dc11)),
?assertEqual(overflow, ratchet(F1, 16#ab000000)),
F2 = make_filter(M, [{'=', 16#aa}, {'>=', 16#dddd}, {'=', 16#cc}]),
?assertEqual(16#aaddcc00, ratchet(F2, 0)),
?assertEqual(16#aa_de_cc_00, ratchet(F2, 16#aa_dd_cd_11)).
%% Bunch of tests that verifying that next_range doesn't skip over keys:
ratchet3_test() ->
?assert(proper:quickcheck(ratchet1_prop(), 100)).
-define(assertIterComplete(A, B),
?assertEqual(A -- [0], B)
).
%% erlfmt-ignore
ratchet1_prop() ->
EpochBits = 4,
Bitsources = [{1, 0, 2}, %% Static topic index
{2, EpochBits, 4}, %% Epoch
{3, 0, 2}, %% Varying topic hash
{2, 0, EpochBits}], %% Timestamp offset
M = make_keymapper(lists:reverse(Bitsources)),
F1 = make_filter(M, [{'=', 2#10}, any, {'=', 2#01}]),
?FORALL(N, integer(0, ones(12)),
ratchet_prop(F1, N)).
-define(assertSameSet(A, B),
?assertIterComplete(lists:sort(A), lists:sort(B))
).
iterate1_test() ->
SizeX = 3,
SizeY = 3,
Keymapper = make_keymapper([{1, 0, SizeX}, {2, 0, SizeY}]),
Keys = test_iteration(Keymapper, [any, any]),
Expected = [
X bor (Y bsl SizeX)
|| Y <- lists:seq(0, ones(SizeY)), X <- lists:seq(0, ones(SizeX))
],
?assertIterComplete(Expected, Keys).
iterate2_test() ->
SizeX = 64,
SizeY = 3,
Keymapper = make_keymapper([{1, 0, SizeX}, {2, 0, SizeY}]),
X = 123456789,
Keys = test_iteration(Keymapper, [{'=', X}, any]),
Expected = [
X bor (Y bsl SizeX)
|| Y <- lists:seq(0, ones(SizeY))
],
?assertIterComplete(Expected, Keys).
iterate3_test() ->
SizeX = 3,
SizeY = 64,
Y = 42,
Keymapper = make_keymapper([{1, 0, SizeX}, {2, 0, SizeY}]),
Keys = test_iteration(Keymapper, [any, {'=', Y}]),
Expected = [
X bor (Y bsl SizeX)
|| X <- lists:seq(0, ones(SizeX))
],
?assertIterComplete(Expected, Keys).
iterate4_test() ->
SizeX = 8,
SizeY = 4,
MinX = 16#fa,
MinY = 16#a,
Keymapper = make_keymapper([{1, 0, SizeX}, {2, 0, SizeY}]),
Keys = test_iteration(Keymapper, [{'>=', MinX}, {'>=', MinY}]),
Expected = [
X bor (Y bsl SizeX)
|| Y <- lists:seq(MinY, ones(SizeY)), X <- lists:seq(MinX, ones(SizeX))
],
?assertIterComplete(Expected, Keys).
iterate1_prop() ->
Size = 4,
?FORALL(
{SizeX, SizeY},
{integer(1, Size), integer(1, Size)},
?FORALL(
{SplitX, MinX, MinY},
{integer(0, SizeX), integer(0, SizeX), integer(0, SizeY)},
begin
Keymapper = make_keymapper([
{1, 0, SplitX}, {2, 0, SizeY}, {1, SplitX, SizeX - SplitX}
]),
Keys = test_iteration(Keymapper, [{'>=', MinX}, {'>=', MinY}]),
Expected = [
vector_to_key(Keymapper, [X, Y])
|| X <- lists:seq(MinX, ones(SizeX)),
Y <- lists:seq(MinY, ones(SizeY))
],
?assertSameSet(Expected, Keys),
true
end
)
).
iterate5_test() ->
?assert(proper:quickcheck(iterate1_prop(), 100)).
iterate2_prop() ->
Size = 4,
?FORALL(
{SizeX, SizeY},
{integer(1, Size), integer(1, Size)},
?FORALL(
{SplitX, MinX, MinY},
{integer(0, SizeX), integer(0, SizeX), integer(0, SizeY)},
begin
Keymapper = make_keymapper([
{1, SplitX, SizeX - SplitX}, {2, 0, SizeY}, {1, 0, SplitX}
]),
Keys = test_iteration(Keymapper, [{'>=', MinX}, {'>=', MinY}]),
Expected = [
vector_to_key(Keymapper, [X, Y])
|| X <- lists:seq(MinX, ones(SizeX)),
Y <- lists:seq(MinY, ones(SizeY))
],
?assertSameSet(Expected, Keys),
true
end
)
).
iterate6_test() ->
?assert(proper:quickcheck(iterate2_prop(), 1000)).
test_iteration(Keymapper, Filter) ->
test_iteration(Keymapper, Filter, 0).
test_iteration(Keymapper, Filter, PrevKey) ->
case next_range(Keymapper, Filter, PrevKey) of
undefined ->
[];
{Key, Bitmask, Bitfilter} ->
?assert((Key band Bitmask) =:= Bitfilter),
[Key | test_iteration(Keymapper, Filter, Key)]
end.
ratchet_prop(Filter = #filter{bitfilter = Bitfilter, bitmask = Bitmask, size = Size}, Key0) ->
Key = ratchet(Filter, Key0),
?assert(Key =:= overflow orelse (Key band Bitmask =:= Bitfilter)),
?assert(Key >= Key0, {Key, '>=', Key}),
IMax = ones(Size),
CheckGaps = fun
F(I) when I >= Key; I > IMax ->
true;
F(I) ->
?assertNot(
I band Bitmask =:= Bitfilter,
{found_gap, Key0, I, Key}
),
F(I + 1)
end,
CheckGaps(Key0).
mkbmask(Keymapper, Filter0) ->
Filter = desugar_filter(Keymapper, Filter0),
make_bitmask(Keymapper, Filter).
incvec(Keymapper, Filter0, Vector) ->
Filter = desugar_filter(Keymapper, Filter0),
inc_vector(Filter, Vector).
Filter = inequations_to_ranges(Keymapper, Filter0),
make_bitfilter(Keymapper, Filter).
key2vec(Schema, Vector) ->
Keymapper = make_keymapper(Schema),

View File

@ -30,7 +30,7 @@
-export([create/4, open/5, store_batch/4, get_streams/4, make_iterator/5, next/4]).
%% internal exports:
-export([]).
-export([format_key/2, format_keyfilter/1]).
-export_type([options/0]).
@ -73,8 +73,7 @@
topic_filter :: emqx_ds:topic_filter(),
start_time :: emqx_ds:time(),
storage_key :: emqx_ds_lts:msg_storage_key(),
last_seen_key = 0 :: emqx_ds_bitmask_keymapper:key(),
key_filter :: [emqx_ds_bitmask_keymapper:scalar_range()]
last_seen_key = <<>> :: binary()
}).
-define(QUICKCHECK_KEY(KEY, BITMASK, BITFILTER),
@ -83,6 +82,8 @@
-define(COUNTER, emqx_ds_storage_bitfield_lts_counter).
-include("emqx_ds_bitmask.hrl").
%%================================================================================
%% API funcions
%%================================================================================
@ -95,7 +96,8 @@ create(_ShardId, DBHandle, GenId, Options) ->
%% Get options:
BitsPerTopicLevel = maps:get(bits_per_wildcard_level, Options, 64),
TopicIndexBytes = maps:get(topic_index_bytes, Options, 4),
TSOffsetBits = maps:get(epoch_bits, Options, 8), %% TODO: change to 10 to make it around ~1 sec
%% 10 bits -> 1024 ms -> ~1 sec
TSOffsetBits = maps:get(epoch_bits, Options, 10),
%% Create column families:
DataCFName = data_cf(GenId),
TrieCFName = trie_cf(GenId),
@ -120,17 +122,17 @@ open(_Shard, DBHandle, GenId, CFRefs, Schema) ->
{_, DataCF} = lists:keyfind(data_cf(GenId), 1, CFRefs),
{_, TrieCF} = lists:keyfind(trie_cf(GenId), 1, CFRefs),
Trie = restore_trie(TopicIndexBytes, DBHandle, TrieCF),
%% If user's topics have more than learned 10 wildcard levels,
%% then it's total carnage; learned topic structure won't help
%% much:
%% If user's topics have more than learned 10 wildcard levels
%% (more than 2, really), then it's total carnage; learned topic
%% structure won't help.
MaxWildcardLevels = 10,
Keymappers = array:from_list(
KeymapperCache = array:from_list(
[
make_keymapper(TopicIndexBytes, BitsPerTopicLevel, TSBits, TSOffsetBits, N)
|| N <- lists:seq(0, MaxWildcardLevels)
]
),
#s{db = DBHandle, data = DataCF, trie = Trie, keymappers = Keymappers}.
#s{db = DBHandle, data = DataCF, trie = Trie, keymappers = KeymapperCache}.
store_batch(_ShardId, S = #s{db = DB, data = Data}, Messages, _Options) ->
lists:foreach(
@ -144,16 +146,26 @@ store_batch(_ShardId, S = #s{db = DB, data = Data}, Messages, _Options) ->
get_streams(_Shard, #s{trie = Trie}, TopicFilter, _StartTime) ->
Indexes = emqx_ds_lts:match_topics(Trie, TopicFilter),
[
#stream{
storage_key = I
}
|| I <- Indexes
].
[#stream{storage_key = I} || I <- Indexes].
make_iterator(_Shard, _Data, #stream{storage_key = StorageKey}, TopicFilter, StartTime) ->
%% Note: it's a good idea to keep the iterator structure lean,
%% since it can be stored on a remote node that could update its
%% code independently from us.
{ok, #it{
topic_filter = TopicFilter,
start_time = StartTime,
storage_key = StorageKey
}}.
next(_Shard, #s{db = DB, data = CF, keymappers = Keymappers}, It0, BatchSize) ->
#it{
start_time = StartTime,
storage_key = StorageKey
} = It0,
%% Make filter:
{TopicIndex, Varying} = StorageKey,
Filter = [
Inequations = [
{'=', TopicIndex},
{'>=', StartTime}
| lists:map(
@ -166,29 +178,22 @@ make_iterator(_Shard, _Data, #stream{storage_key = StorageKey}, TopicFilter, Sta
Varying
)
],
{ok, #it{
topic_filter = TopicFilter,
start_time = StartTime,
storage_key = StorageKey,
key_filter = Filter
}}.
next(_Shard, #s{db = DB, data = CF, keymappers = Keymappers}, It0, BatchSize) ->
#it{
key_filter = KeyFilter
} = It0,
% TODO: ugh, so ugly
NVarying = length(KeyFilter) - 2,
%% Obtain a keymapper for the current number of varying
%% levels. Magic constant 2: we have two extra dimensions of topic
%% index and time; the rest of dimensions are varying levels.
NVarying = length(Inequations) - 2,
Keymapper = array:get(NVarying, Keymappers),
%% Calculate lower and upper bounds for iteration:
LowerBound = lower_bound(Keymapper, KeyFilter),
UpperBound = upper_bound(Keymapper, KeyFilter),
Filter =
#filter{range_min = LowerBound, range_max = UpperBound} = emqx_ds_bitmask_keymapper:make_filter(
Keymapper, Inequations
),
{ok, ITHandle} = rocksdb:iterator(DB, CF, [
{iterate_lower_bound, LowerBound}, {iterate_upper_bound, UpperBound}
{iterate_lower_bound, emqx_ds_bitmask_keymapper:key_to_bitstring(Keymapper, LowerBound)},
{iterate_upper_bound, emqx_ds_bitmask_keymapper:key_to_bitstring(Keymapper, UpperBound)}
]),
try
put(?COUNTER, 0),
next_loop(ITHandle, Keymapper, It0, [], BatchSize)
next_loop(ITHandle, Keymapper, Filter, It0, [], BatchSize)
after
rocksdb:iterator_close(ITHandle),
erase(?COUNTER)
@ -198,100 +203,64 @@ next(_Shard, #s{db = DB, data = CF, keymappers = Keymappers}, It0, BatchSize) ->
%% Internal functions
%%================================================================================
next_loop(_, _, It, Acc, 0) ->
next_loop(ITHandle, KeyMapper, Filter, It, Acc, 0) ->
{ok, It, lists:reverse(Acc)};
next_loop(ITHandle, KeyMapper, It0 = #it{last_seen_key = Key0, key_filter = KeyFilter}, Acc0, N0) ->
next_loop(ITHandle, KeyMapper, Filter, It0, Acc0, N0) ->
inc_counter(),
case next_range(KeyMapper, It0) of
{Key1, Bitmask, Bitfilter} when Key1 > Key0 ->
case iterator_move(KeyMapper, ITHandle, {seek, Key1}) of
{ok, Key, Val} when ?QUICKCHECK_KEY(Key, Bitmask, Bitfilter) ->
assert_progress(bitmask_match, KeyMapper, KeyFilter, Key0, Key1),
Msg = deserialize(Val),
#it{last_seen_key = Key0} = It0,
case emqx_ds_bitmask_keymapper:bin_increment(Filter, Key0) of
overflow ->
{ok, It0, lists:reverse(Acc0)};
Key1 ->
%% assert
true = Key1 > Key0,
case rocksdb:iterator_move(ITHandle, {seek, Key1}) of
{ok, Key, Val} ->
It1 = It0#it{last_seen_key = Key},
case check_message(It1, Msg) of
true ->
case check_message(Filter, It1, Val) of
{true, Msg} ->
N1 = N0 - 1,
Acc1 = [Msg | Acc0];
false ->
N1 = N0,
Acc1 = Acc0
end,
{N, It, Acc} = traverse_interval(
ITHandle, KeyMapper, Bitmask, Bitfilter, It1, Acc1, N1
),
next_loop(ITHandle, KeyMapper, It, Acc, N);
{ok, Key, _Val} ->
assert_progress(bitmask_miss, KeyMapper, KeyFilter, Key0, Key1),
It = It0#it{last_seen_key = Key},
next_loop(ITHandle, KeyMapper, It, Acc0, N0);
{N, It, Acc} = traverse_interval(ITHandle, KeyMapper, Filter, It1, Acc1, N1),
next_loop(ITHandle, KeyMapper, Filter, It, Acc, N);
{error, invalid_iterator} ->
{ok, It0, lists:reverse(Acc0)}
end;
_ ->
{ok, It0, lists:reverse(Acc0)}
end
end.
traverse_interval(_, _, _, _, It, Acc, 0) ->
traverse_interval(_ITHandle, _KeyMapper, _Filter, It, Acc, 0) ->
{0, It, Acc};
traverse_interval(ITHandle, KeyMapper, Bitmask, Bitfilter, It0, Acc, N) ->
traverse_interval(ITHandle, KeyMapper, Filter, It0, Acc, N) ->
inc_counter(),
case iterator_move(KeyMapper, ITHandle, next) of
{ok, Key, Val} when ?QUICKCHECK_KEY(Key, Bitmask, Bitfilter) ->
Msg = deserialize(Val),
case rocksdb:iterator_move(ITHandle, next) of
{ok, Key, Val} ->
It = It0#it{last_seen_key = Key},
case check_message(It, Msg) of
true ->
traverse_interval(
ITHandle, KeyMapper, Bitmask, Bitfilter, It, [Msg | Acc], N - 1
);
case check_message(Filter, It, Val) of
{true, Msg} ->
traverse_interval(ITHandle, KeyMapper, Filter, It, [Msg | Acc], N - 1);
false ->
traverse_interval(ITHandle, KeyMapper, Bitmask, Bitfilter, It, Acc, N)
traverse_interval(ITHandle, KeyMapper, Filter, It, Acc, N)
end;
{ok, Key, _Val} ->
It = It0#it{last_seen_key = Key},
{N, It, Acc};
{error, invalid_iterator} ->
{0, It0, Acc}
end.
next_range(KeyMapper, #it{key_filter = KeyFilter, last_seen_key = PrevKey}) ->
emqx_ds_bitmask_keymapper:next_range(KeyMapper, KeyFilter, PrevKey).
check_message(_Iterator, _Msg) ->
%% TODO.
true.
iterator_move(KeyMapper, ITHandle, Action0) ->
Action =
case Action0 of
next ->
next;
{seek, Int} ->
{seek, emqx_ds_bitmask_keymapper:key_to_bitstring(KeyMapper, Int)}
end,
case rocksdb:iterator_move(ITHandle, Action) of
{ok, KeyBin, Val} ->
{ok, emqx_ds_bitmask_keymapper:bitstring_to_key(KeyMapper, KeyBin), Val};
{ok, KeyBin} ->
{ok, emqx_ds_bitmask_keymapper:bitstring_to_key(KeyMapper, KeyBin)};
Other ->
Other
-spec check_message(emqx_ds_bitmask_keymapper:filter(), #it{}, binary()) ->
{true, #message{}} | false.
check_message(Filter, #it{last_seen_key = Key}, Val) ->
case emqx_ds_bitmask_keymapper:bin_checkmask(Filter, Key) of
true ->
Msg = deserialize(Val),
%% TODO: check strict time and hash collisions
{true, Msg};
false ->
false
end.
assert_progress(_Msg, _KeyMapper, _KeyFilter, Key0, Key1) when Key1 > Key0 ->
?tp_ignore_side_effects_in_prod(
emqx_ds_storage_bitfield_lts_iter_move,
#{ location => _Msg
, key0 => format_key(_KeyMapper, Key0)
, key1 => format_key(_KeyMapper, Key1)
}),
ok;
assert_progress(Msg, KeyMapper, KeyFilter, Key0, Key1) ->
Str0 = format_key(KeyMapper, Key0),
Str1 = format_key(KeyMapper, Key1),
error(#{'$msg' => Msg, key0 => Str0, key1 => Str1, step => get(?COUNTER), keyfilter => lists:map(fun format_keyfilter/1, KeyFilter)}).
format_key(KeyMapper, Key) ->
Vec = [integer_to_list(I, 16) || I <- emqx_ds_bitmask_keymapper:key_to_vector(KeyMapper, Key)],
lists:flatten(io_lib:format("~.16B (~s)", [Key, string:join(Vec, ",")])).
@ -357,16 +326,6 @@ make_keymapper(TopicIndexBytes, BitsPerTopicLevel, TSBits, TSOffsetBits, N) ->
end,
Keymapper.
upper_bound(Keymapper, [TopicIndex | Rest]) ->
filter_to_key(Keymapper, [TopicIndex | [{'=', infinity} || _ <- Rest]]).
lower_bound(Keymapper, [TopicIndex | Rest]) ->
filter_to_key(Keymapper, [TopicIndex | [{'=', 0} || _ <- Rest]]).
filter_to_key(KeyMapper, KeyFilter) ->
{Key, _, _} = emqx_ds_bitmask_keymapper:next_range(KeyMapper, KeyFilter, 0),
emqx_ds_bitmask_keymapper:key_to_bitstring(KeyMapper, Key).
-spec restore_trie(pos_integer(), rocksdb:db_handle(), rocksdb:cf_handle()) -> emqx_ds_lts:trie().
restore_trie(TopicIndexBytes, DB, CF) ->
PersistCallback = fun(Key, Val) ->

View File

@ -1,714 +0,0 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_ds_storage_layer).
-behaviour(gen_server).
%% API:
-export([start_link/2]).
-export([create_generation/3]).
-export([open_shard/2, get_streams/3]).
-export([message_store/3]).
-export([delete/4]).
-export([make_iterator/3, next/1, next/2]).
-export([
preserve_iterator/2,
restore_iterator/2,
discard_iterator/2,
ensure_iterator/3,
discard_iterator_prefix/2,
list_iterator_prefix/2,
foldl_iterator_prefix/4
]).
%% gen_server callbacks:
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
-export_type([stream/0, cf_refs/0, gen_id/0, options/0, state/0, iterator/0]).
-export_type([db_options/0, db_write_options/0, db_read_options/0]).
-compile({inline, [meta_lookup/2]}).
-include_lib("emqx/include/emqx.hrl").
%%================================================================================
%% Type declarations
%%================================================================================
-type options() :: #{
dir => file:filename()
}.
%% see rocksdb:db_options()
-type db_options() :: proplists:proplist().
%% see rocksdb:write_options()
-type db_write_options() :: proplists:proplist().
%% see rocksdb:read_options()
-type db_read_options() :: proplists:proplist().
-type cf_refs() :: [{string(), rocksdb:cf_handle()}].
%% Message storage generation
%% Keep in mind that instances of this type are persisted in long-term storage.
-type generation() :: #{
%% Module that handles data for the generation
module := module(),
%% Module-specific data defined at generation creation time
data := term(),
%% When should this generation become active?
%% This generation should only contain messages timestamped no earlier than that.
%% The very first generation will have `since` equal 0.
since := emqx_ds:time()
}.
-record(s, {
shard :: emqx_ds:shard(),
keyspace :: emqx_ds_conf:keyspace(),
db :: rocksdb:db_handle(),
cf_iterator :: rocksdb:cf_handle(),
cf_generations :: cf_refs()
}).
-record(stream,
{ generation :: gen_id()
, topic_filter :: emqx_ds:topic_filter()
, since :: emqx_ds:time()
, enc :: _EncapsultatedData
}).
-opaque stream() :: #stream{}.
-record(it, {
shard :: emqx_ds:shard(),
gen :: gen_id(),
replay :: emqx_ds:replay(),
module :: module(),
data :: term()
}).
-type gen_id() :: 0..16#ffff.
-opaque state() :: #s{}.
-opaque iterator() :: #it{}.
%% Contents of the default column family:
%%
%% [{<<"genNN">>, #generation{}}, ...,
%% {<<"current">>, GenID}]
-define(DEFAULT_CF, "default").
-define(DEFAULT_CF_OPTS, []).
-define(ITERATOR_CF, "$iterators").
%% TODO
%% 1. CuckooTable might be of use here / `OptimizeForPointLookup(...)`.
%% 2. Supposedly might be compressed _very_ effectively.
%% 3. `inplace_update_support`?
-define(ITERATOR_CF_OPTS, []).
-define(REF(ShardId), {via, gproc, {n, l, {?MODULE, ShardId}}}).
%%================================================================================
%% Callbacks
%%================================================================================
-callback create_new(rocksdb:db_handle(), gen_id(), _Options :: term()) ->
{_Schema, cf_refs()}.
-callback open(
emqx_ds:shard(),
rocksdb:db_handle(),
gen_id(),
cf_refs(),
_Schema
) ->
_DB.
-callback store(
_DB,
_MessageID :: binary(),
emqx_ds:time(),
emqx_ds:topic(),
_Payload :: binary()
) ->
ok | {error, _}.
-callback delete(_DB, _MessageID :: binary(), emqx_ds:time(), emqx_ds:topic()) ->
ok | {error, _}.
-callback get_streams(_DB, emqx_ds:topic_filter(), emqx_ds:time()) ->
[{_TopicRankX, _Stream}].
-callback make_iterator(_DB, emqx_ds:replay()) ->
{ok, _It} | {error, _}.
-callback restore_iterator(_DB, _Serialized :: binary()) -> {ok, _It} | {error, _}.
-callback preserve_iterator(_It) -> term().
-callback next(It) -> {value, binary(), It} | none | {error, closed}.
%%================================================================================
%% Replication layer API
%%================================================================================
-spec open_shard(emqx_ds_replication_layer:shard(), emqx_ds_storage_layer:options()) -> ok.
open_shard(Shard, Options) ->
emqx_ds_storage_layer_sup:ensure_shard(Shard, Options).
-spec get_streams(emqx_ds:shard_id(), emqx_ds:topic_filter(), emqx_ds:time()) -> [{emqx_ds:stream_rank(), _Stream}].
get_streams(Shard, TopicFilter, StartTime) ->
%% TODO: lookup ALL generations
{GenId, #{module := Mod, data := ModState}} = meta_lookup_gen(Shard, StartTime),
lists:map(
fun({RankX, ModStream}) ->
Stream = #stream{ generation = GenId
, topic_filter = TopicFilter
, since = StartTime
, enc = ModStream
},
Rank = {RankX, GenId},
{Rank, Stream}
end,
Mod:get_streams(ModState, TopicFilter, StartTime)).
-spec message_store(emqx_ds:shard(), [emqx_types:message()], emqx_ds:message_store_opts()) ->
{ok, _MessageId} | {error, _}.
message_store(Shard, Msgs, _Opts) ->
{ok, lists:map(
fun(Msg) ->
GUID = emqx_message:id(Msg),
Timestamp = Msg#message.timestamp,
{_GenId, #{module := Mod, data := ModState}} = meta_lookup_gen(Shard, Timestamp),
Topic = emqx_topic:words(emqx_message:topic(Msg)),
Payload = serialize(Msg),
Mod:store(ModState, GUID, Timestamp, Topic, Payload),
GUID
end,
Msgs)}.
-spec next(iterator()) -> {ok, iterator(), [binary()]} | end_of_stream.
next(It = #it{}) ->
next(It, _BatchSize = 1).
-spec next(iterator(), pos_integer()) -> {ok, iterator(), [binary()]} | end_of_stream.
next(#it{data = {?MODULE, end_of_stream}}, _BatchSize) ->
end_of_stream;
next(
It = #it{shard = Shard, module = Mod, gen = Gen, data = {?MODULE, retry, Serialized}}, BatchSize
) ->
#{data := DBData} = meta_get_gen(Shard, Gen),
{ok, ItData} = Mod:restore_iterator(DBData, Serialized),
next(It#it{data = ItData}, BatchSize);
next(It = #it{}, BatchSize) ->
do_next(It, BatchSize, _Acc = []).
%%================================================================================
%% API functions
%%================================================================================
-spec create_generation(
emqx_ds:shard(), emqx_ds:time(), emqx_ds_conf:backend_config()
) ->
{ok, gen_id()} | {error, nonmonotonic}.
create_generation(ShardId, Since, Config = {_Module, _Options}) ->
gen_server:call(?REF(ShardId), {create_generation, Since, Config}).
-spec delete(emqx_ds:shard(), emqx_guid:guid(), emqx_ds:time(), emqx_ds:topic()) ->
ok | {error, _}.
delete(Shard, GUID, Time, Topic) ->
{_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Shard, Time),
Mod:delete(Data, GUID, Time, Topic).
-spec make_iterator(emqx_ds:shard(), stream(), emqx_ds:time()) ->
{ok, iterator()} | {error, _TODO}.
make_iterator(Shard, Stream, StartTime) ->
#stream{ topic_filter = TopicFilter
, since = Since
, enc = Enc
} = Stream,
{GenId, Gen} = meta_lookup_gen(Shard, StartTime),
Replay = {TopicFilter, Since},
case Mod:make_iterator(Data, Replay, Options) of
#it{ gen = GenId,
replay = {TopicFilter, Since}
}.
-spec do_next(iterator(), non_neg_integer(), [binary()]) ->
{ok, iterator(), [binary()]} | end_of_stream.
do_next(It, N, Acc) when N =< 0 ->
{ok, It, lists:reverse(Acc)};
do_next(It = #it{module = Mod, data = ItData}, N, Acc) ->
case Mod:next(ItData) of
{value, Bin, ItDataNext} ->
Val = deserialize(Bin),
do_next(It#it{data = ItDataNext}, N - 1, [Val | Acc]);
{error, _} = _Error ->
%% todo: log?
%% iterator might be invalid now; will need to re-open it.
Serialized = Mod:preserve_iterator(ItData),
{ok, It#it{data = {?MODULE, retry, Serialized}}, lists:reverse(Acc)};
none ->
case open_next_iterator(It) of
{ok, ItNext} ->
do_next(ItNext, N, Acc);
{error, _} = _Error ->
%% todo: log?
%% fixme: only bad options may lead to this?
%% return an "empty" iterator to be re-opened when retrying?
Serialized = Mod:preserve_iterator(ItData),
{ok, It#it{data = {?MODULE, retry, Serialized}}, lists:reverse(Acc)};
none ->
case Acc of
[] ->
end_of_stream;
_ ->
{ok, It#it{data = {?MODULE, end_of_stream}}, lists:reverse(Acc)}
end
end
end.
-spec preserve_iterator(iterator(), emqx_ds:iterator_id()) ->
ok | {error, _TODO}.
preserve_iterator(It = #it{}, IteratorID) ->
iterator_put_state(IteratorID, It).
-spec restore_iterator(emqx_ds:shard(), emqx_ds:replay_id()) ->
{ok, iterator()} | {error, _TODO}.
restore_iterator(Shard, ReplayID) ->
case iterator_get_state(Shard, ReplayID) of
{ok, Serial} ->
restore_iterator_state(Shard, Serial);
not_found ->
{error, not_found};
{error, _Reason} = Error ->
Error
end.
-spec ensure_iterator(emqx_ds:shard(), emqx_ds:iterator_id(), emqx_ds:replay()) ->
{ok, iterator()} | {error, _TODO}.
ensure_iterator(Shard, IteratorID, Replay = {_TopicFilter, _StartMS}) ->
case restore_iterator(Shard, IteratorID) of
{ok, It} ->
{ok, It};
{error, not_found} ->
{ok, It} = make_iterator(Shard, Replay),
ok = emqx_ds_storage_layer:preserve_iterator(It, IteratorID),
{ok, It};
Error ->
Error
end.
-spec discard_iterator(emqx_ds:shard(), emqx_ds:replay_id()) ->
ok | {error, _TODO}.
discard_iterator(Shard, ReplayID) ->
iterator_delete(Shard, ReplayID).
-spec discard_iterator_prefix(emqx_ds:shard(), binary()) ->
ok | {error, _TODO}.
discard_iterator_prefix(Shard, KeyPrefix) ->
case do_discard_iterator_prefix(Shard, KeyPrefix) of
{ok, _} -> ok;
Error -> Error
end.
-spec list_iterator_prefix(
emqx_ds:shard(),
binary()
) -> {ok, [emqx_ds:iterator_id()]} | {error, _TODO}.
list_iterator_prefix(Shard, KeyPrefix) ->
do_list_iterator_prefix(Shard, KeyPrefix).
-spec foldl_iterator_prefix(
emqx_ds:shard(),
binary(),
fun((_Key :: binary(), _Value :: binary(), Acc) -> Acc),
Acc
) -> {ok, Acc} | {error, _TODO} when
Acc :: term().
foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc) ->
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc).
%%================================================================================
%% gen_server
%%================================================================================
-spec start_link(emqx_ds:shard(), emqx_ds_storage_layer:options()) ->
{ok, pid()}.
start_link(Shard, Options) ->
gen_server:start_link(?REF(Shard), ?MODULE, {Shard, Options}, []).
init({Shard, Options}) ->
process_flag(trap_exit, true),
{ok, S0} = do_open_db(Shard, Options),
S = ensure_current_generation(S0),
ok = populate_metadata(S),
{ok, S}.
handle_call({create_generation, Since, Config}, _From, S) ->
case create_new_gen(Since, Config, S) of
{ok, GenId, NS} ->
{reply, {ok, GenId}, NS};
{error, _} = Error ->
{reply, Error, S}
end;
handle_call(_Call, _From, S) ->
{reply, {error, unknown_call}, S}.
handle_cast(_Cast, S) ->
{noreply, S}.
handle_info(_Info, S) ->
{noreply, S}.
terminate(_Reason, #s{db = DB, shard = Shard}) ->
meta_erase(Shard),
ok = rocksdb:close(DB).
%%================================================================================
%% Internal functions
%%================================================================================
-record(db, {handle :: rocksdb:db_handle(), cf_iterator :: rocksdb:cf_handle()}).
-spec populate_metadata(state()) -> ok.
populate_metadata(S = #s{shard = Shard, db = DBHandle, cf_iterator = CFIterator}) ->
ok = meta_put(Shard, db, #db{handle = DBHandle, cf_iterator = CFIterator}),
Current = schema_get_current(DBHandle),
lists:foreach(fun(GenId) -> populate_metadata(GenId, S) end, lists:seq(0, Current)).
-spec populate_metadata(gen_id(), state()) -> ok.
populate_metadata(GenId, S = #s{shard = Shard, db = DBHandle}) ->
Gen = open_gen(GenId, schema_get_gen(DBHandle, GenId), S),
meta_register_gen(Shard, GenId, Gen).
-spec ensure_current_generation(state()) -> state().
ensure_current_generation(S = #s{shard = _Shard, keyspace = Keyspace, db = DBHandle}) ->
case schema_get_current(DBHandle) of
undefined ->
Config = emqx_ds_conf:keyspace_config(Keyspace),
{ok, _, NS} = create_new_gen(0, Config, S),
NS;
_GenId ->
S
end.
-spec create_new_gen(emqx_ds:time(), emqx_ds_conf:backend_config(), state()) ->
{ok, gen_id(), state()} | {error, nonmonotonic}.
create_new_gen(Since, Config, S = #s{shard = Shard, db = DBHandle}) ->
GenId = get_next_id(meta_get_current(Shard)),
GenId = get_next_id(schema_get_current(DBHandle)),
case is_gen_valid(Shard, GenId, Since) of
ok ->
{ok, Gen, NS} = create_gen(GenId, Since, Config, S),
%% TODO: Transaction? Column family creation can't be transactional, anyway.
ok = schema_put_gen(DBHandle, GenId, Gen),
ok = schema_put_current(DBHandle, GenId),
ok = meta_register_gen(Shard, GenId, open_gen(GenId, Gen, NS)),
{ok, GenId, NS};
{error, _} = Error ->
Error
end.
-spec create_gen(gen_id(), emqx_ds:time(), emqx_ds_conf:backend_config(), state()) ->
{ok, generation(), state()}.
create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, cf_generations = CFs}) ->
% TODO: Backend implementation should ensure idempotency.
{Schema, NewCFs} = Module:create_new(DBHandle, GenId, Options),
Gen = #{
module => Module,
data => Schema,
since => Since
},
{ok, Gen, S#s{cf_generations = NewCFs ++ CFs}}.
-spec do_open_db(emqx_ds:shard(), options()) -> {ok, state()} | {error, _TODO}.
do_open_db(Shard, Options) ->
DefaultDir = binary_to_list(Shard),
DBDir = unicode:characters_to_list(maps:get(dir, Options, DefaultDir)),
%% TODO: properly forward keyspace
Keyspace = maps:get(keyspace, Options, default_keyspace),
DBOptions = [
{create_if_missing, true},
{create_missing_column_families, true}
| emqx_ds_conf:db_options(Keyspace)
],
_ = filelib:ensure_dir(DBDir),
ExistingCFs =
case rocksdb:list_column_families(DBDir, DBOptions) of
{ok, CFs} ->
[{Name, []} || Name <- CFs, Name /= ?DEFAULT_CF, Name /= ?ITERATOR_CF];
% DB is not present. First start
{error, {db_open, _}} ->
[]
end,
ColumnFamilies = [
{?DEFAULT_CF, ?DEFAULT_CF_OPTS},
{?ITERATOR_CF, ?ITERATOR_CF_OPTS}
| ExistingCFs
],
case rocksdb:open(DBDir, DBOptions, ColumnFamilies) of
{ok, DBHandle, [_CFDefault, CFIterator | CFRefs]} ->
{CFNames, _} = lists:unzip(ExistingCFs),
{ok, #s{
shard = Shard,
keyspace = Keyspace,
db = DBHandle,
cf_iterator = CFIterator,
cf_generations = lists:zip(CFNames, CFRefs)
}};
Error ->
Error
end.
-spec open_gen(gen_id(), generation(), state()) -> generation().
open_gen(
GenId,
Gen = #{module := Mod, data := Data},
#s{shard = Shard, db = DBHandle, cf_generations = CFs}
) ->
DB = Mod:open(Shard, DBHandle, GenId, CFs, Data),
Gen#{data := DB}.
-spec open_next_iterator(iterator()) -> {ok, iterator()} | {error, _Reason} | none.
open_next_iterator(It = #it{shard = Shard, gen = GenId}) ->
open_next_iterator(meta_get_gen(Shard, GenId + 1), It#it{gen = GenId + 1}).
open_next_iterator(undefined, _It) ->
none;
open_next_iterator(Gen = #{}, It) ->
open_iterator(Gen, It).
-spec open_restore_iterator(generation(), iterator(), binary()) ->
{ok, iterator()} | {error, _Reason}.
open_restore_iterator(#{module := Mod, data := Data}, It = #it{}, Serial) ->
case Mod:restore_iterator(Data, Serial) of
{ok, ItData} ->
{ok, It#it{module = Mod, data = ItData}};
Err ->
Err
end.
%%
-define(KEY_REPLAY_STATE(IteratorId), <<(IteratorId)/binary, "rs">>).
-define(KEY_REPLAY_STATE_PAT(KeyReplayState), begin
<<IteratorId:(size(KeyReplayState) - 2)/binary, "rs">> = (KeyReplayState),
IteratorId
end).
-define(ITERATION_WRITE_OPTS, []).
-define(ITERATION_READ_OPTS, []).
iterator_get_state(Shard, ReplayID) ->
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
rocksdb:get(Handle, CF, ?KEY_REPLAY_STATE(ReplayID), ?ITERATION_READ_OPTS).
iterator_put_state(ID, It = #it{shard = Shard}) ->
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
Serial = preserve_iterator_state(It),
rocksdb:put(Handle, CF, ?KEY_REPLAY_STATE(ID), Serial, ?ITERATION_WRITE_OPTS).
iterator_delete(Shard, ID) ->
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
rocksdb:delete(Handle, CF, ?KEY_REPLAY_STATE(ID), ?ITERATION_WRITE_OPTS).
preserve_iterator_state(#it{
gen = Gen,
replay = {TopicFilter, StartTime},
module = Mod,
data = ItData
}) ->
term_to_binary(#{
v => 1,
gen => Gen,
filter => TopicFilter,
start => StartTime,
st => Mod:preserve_iterator(ItData)
}).
restore_iterator_state(Shard, Serial) when is_binary(Serial) ->
restore_iterator_state(Shard, binary_to_term(Serial));
restore_iterator_state(
Shard,
#{
v := 1,
gen := Gen,
filter := TopicFilter,
start := StartTime,
st := State
}
) ->
It = #it{shard = Shard, gen = Gen, replay = {TopicFilter, StartTime}},
open_restore_iterator(meta_get_gen(Shard, Gen), It, State).
do_list_iterator_prefix(Shard, KeyPrefix) ->
Fn = fun(K0, _V, Acc) ->
K = ?KEY_REPLAY_STATE_PAT(K0),
[K | Acc]
end,
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, []).
do_discard_iterator_prefix(Shard, KeyPrefix) ->
#db{handle = DBHandle, cf_iterator = CF} = meta_lookup(Shard, db),
Fn = fun(K, _V, _Acc) -> ok = rocksdb:delete(DBHandle, CF, K, ?ITERATION_WRITE_OPTS) end,
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, ok).
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc) ->
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
case rocksdb:iterator(Handle, CF, ?ITERATION_READ_OPTS) of
{ok, It} ->
NextAction = {seek, KeyPrefix},
do_foldl_iterator_prefix(Handle, CF, It, KeyPrefix, NextAction, Fn, Acc);
Error ->
Error
end.
do_foldl_iterator_prefix(DBHandle, CF, It, KeyPrefix, NextAction, Fn, Acc) ->
case rocksdb:iterator_move(It, NextAction) of
{ok, K = <<KeyPrefix:(size(KeyPrefix))/binary, _/binary>>, V} ->
NewAcc = Fn(K, V, Acc),
do_foldl_iterator_prefix(DBHandle, CF, It, KeyPrefix, next, Fn, NewAcc);
{ok, _K, _V} ->
ok = rocksdb:iterator_close(It),
{ok, Acc};
{error, invalid_iterator} ->
ok = rocksdb:iterator_close(It),
{ok, Acc};
Error ->
ok = rocksdb:iterator_close(It),
Error
end.
%% Functions for dealing with the metadata stored persistently in rocksdb
-define(CURRENT_GEN, <<"current">>).
-define(SCHEMA_WRITE_OPTS, []).
-define(SCHEMA_READ_OPTS, []).
-spec schema_get_gen(rocksdb:db_handle(), gen_id()) -> generation().
schema_get_gen(DBHandle, GenId) ->
{ok, Bin} = rocksdb:get(DBHandle, schema_gen_key(GenId), ?SCHEMA_READ_OPTS),
binary_to_term(Bin).
-spec schema_put_gen(rocksdb:db_handle(), gen_id(), generation()) -> ok | {error, _}.
schema_put_gen(DBHandle, GenId, Gen) ->
rocksdb:put(DBHandle, schema_gen_key(GenId), term_to_binary(Gen), ?SCHEMA_WRITE_OPTS).
-spec schema_get_current(rocksdb:db_handle()) -> gen_id() | undefined.
schema_get_current(DBHandle) ->
case rocksdb:get(DBHandle, ?CURRENT_GEN, ?SCHEMA_READ_OPTS) of
{ok, Bin} ->
binary_to_integer(Bin);
not_found ->
undefined
end.
-spec schema_put_current(rocksdb:db_handle(), gen_id()) -> ok | {error, _}.
schema_put_current(DBHandle, GenId) ->
rocksdb:put(DBHandle, ?CURRENT_GEN, integer_to_binary(GenId), ?SCHEMA_WRITE_OPTS).
-spec schema_gen_key(integer()) -> binary().
schema_gen_key(N) ->
<<"gen", N:32>>.
-undef(CURRENT_GEN).
-undef(SCHEMA_WRITE_OPTS).
-undef(SCHEMA_READ_OPTS).
%% Functions for dealing with the runtime shard metadata:
-define(PERSISTENT_TERM(SHARD, GEN), {emqx_ds_storage_layer, SHARD, GEN}).
-spec meta_register_gen(emqx_ds:shard(), gen_id(), generation()) -> ok.
meta_register_gen(Shard, GenId, Gen) ->
Gs =
case GenId > 0 of
true -> meta_lookup(Shard, GenId - 1);
false -> []
end,
ok = meta_put(Shard, GenId, [Gen | Gs]),
ok = meta_put(Shard, current, GenId).
-spec meta_lookup_gen(emqx_ds:shard(), emqx_ds:time()) -> {gen_id(), generation()}.
meta_lookup_gen(Shard, Time) ->
%% TODO
%% Is cheaper persistent term GC on update here worth extra lookup? I'm leaning
%% towards a "no".
Current = meta_lookup(Shard, current),
Gens = meta_lookup(Shard, Current),
find_gen(Time, Current, Gens).
find_gen(Time, GenId, [Gen = #{since := Since} | _]) when Time >= Since ->
{GenId, Gen};
find_gen(Time, GenId, [_Gen | Rest]) ->
find_gen(Time, GenId - 1, Rest).
-spec meta_get_gen(emqx_ds:shard(), gen_id()) -> generation() | undefined.
meta_get_gen(Shard, GenId) ->
case meta_lookup(Shard, GenId, []) of
[Gen | _Older] -> Gen;
[] -> undefined
end.
-spec meta_get_current(emqx_ds:shard()) -> gen_id() | undefined.
meta_get_current(Shard) ->
meta_lookup(Shard, current, undefined).
-spec meta_lookup(emqx_ds:shard(), _K) -> _V.
meta_lookup(Shard, Key) ->
persistent_term:get(?PERSISTENT_TERM(Shard, Key)).
-spec meta_lookup(emqx_ds:shard(), _K, Default) -> _V | Default.
meta_lookup(Shard, K, Default) ->
persistent_term:get(?PERSISTENT_TERM(Shard, K), Default).
-spec meta_put(emqx_ds:shard(), _K, _V) -> ok.
meta_put(Shard, K, V) ->
persistent_term:put(?PERSISTENT_TERM(Shard, K), V).
-spec meta_erase(emqx_ds:shard()) -> ok.
meta_erase(Shard) ->
[
persistent_term:erase(K)
|| {K = ?PERSISTENT_TERM(Z, _), _} <- persistent_term:get(), Z =:= Shard
],
ok.
-undef(PERSISTENT_TERM).
get_next_id(undefined) -> 0;
get_next_id(GenId) -> GenId + 1.
is_gen_valid(Shard, GenId, Since) when GenId > 0 ->
[GenPrev | _] = meta_lookup(Shard, GenId - 1),
case GenPrev of
#{since := SincePrev} when Since > SincePrev ->
ok;
#{} ->
{error, nonmonotonic}
end;
is_gen_valid(_Shard, 0, 0) ->
ok.
serialize(Msg) ->
%% TODO: remove topic, GUID, etc. from the stored
%% message. Reconstruct it from the metadata.
term_to_binary(emqx_message:to_map(Msg)).
deserialize(Bin) ->
emqx_message:from_map(binary_to_term(Bin)).
%% -spec store_cfs(rocksdb:db_handle(), [{string(), rocksdb:cf_handle()}]) -> ok.
%% store_cfs(DBHandle, CFRefs) ->
%% lists:foreach(
%% fun({CFName, CFRef}) ->
%% persistent_term:put({self(), CFName}, {DBHandle, CFRef})
%% end,
%% CFRefs).

View File

@ -1,748 +0,0 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%--------------------------------------------------------------------
-module(emqx_ds_message_storage_bitmask).
%%================================================================================
%% @doc Description of the schema
%%
%% Let us assume that `T' is a topic and `t' is time. These are the two
%% dimensions used to index messages. They can be viewed as
%% "coordinates" of an MQTT message in a 2D space.
%%
%% Oftentimes, when wildcard subscription is used, keys must be
%% scanned in both dimensions simultaneously.
%%
%% Rocksdb allows to iterate over sorted keys very fast. This means we
%% need to map our two-dimentional keys to a single index that is
%% sorted in a way that helps to iterate over both time and topic
%% without having to do a lot of random seeks.
%%
%% == Mapping of 2D keys to rocksdb keys ==
%%
%% We use "zigzag" pattern to store messages, where rocksdb key is
%% composed like like this:
%%
%% |ttttt|TTTTTTTTT|tttt|
%% ^ ^ ^
%% | | |
%% +-------+ | +---------+
%% | | |
%% most significant topic hash least significant
%% bits of timestamp bits of timestamp
%% (a.k.a epoch) (a.k.a time offset)
%%
%% Topic hash is level-aware: each topic level is hashed separately
%% and the resulting hashes are bitwise-concatentated. This allows us
%% to map topics to fixed-length bitstrings while keeping some degree
%% of information about the hierarchy.
%%
%% Next important concept is what we call "epoch". Duration of the
%% epoch is determined by maximum time offset. Epoch is calculated by
%% shifting bits of the timestamp right.
%%
%% The resulting index is a space-filling curve that looks like
%% this in the topic-time 2D space:
%%
%% T ^ ---->------ |---->------ |---->------
%% | --/ / --/ / --/
%% | -<-/ | -<-/ | -<-/
%% | -/ | -/ | -/
%% | ---->------ | ---->------ | ---->------
%% | --/ / --/ / --/
%% | ---/ | ---/ | ---/
%% | -/ ^ -/ ^ -/
%% | ---->------ | ---->------ | ---->------
%% | --/ / --/ / --/
%% | -<-/ | -<-/ | -<-/
%% | -/ | -/ | -/
%% | ---->------| ---->------| ---------->
%% |
%% -+------------+-----------------------------> t
%% epoch
%%
%% This structure allows to quickly seek to a the first message that
%% was recorded in a certain epoch in a certain topic or a
%% group of topics matching filter like `foo/bar/#`.
%%
%% Due to its structure, for each pair of rocksdb keys K1 and K2, such
%% that K1 > K2 and topic(K1) = topic(K2), timestamp(K1) >
%% timestamp(K2).
%% That is, replay doesn't reorder messages published in each
%% individual topic.
%%
%% This property doesn't hold between different topics, but it's not deemed
%% a problem right now.
%%
%%================================================================================
-behaviour(emqx_ds_storage_layer).
%% API:
-export([create_new/3, open/5]).
-export([make_keymapper/1]).
-export([store/5, delete/4]).
-export([get_streams/3, make_iterator/3, next/1]).
-export([preserve_iterator/1, restore_iterator/2, refresh_iterator/1]).
%% Debug/troubleshooting:
%% Keymappers
-export([
keymapper_info/1,
compute_bitstring/3,
compute_topic_bitmask/2,
compute_time_bitmask/1,
hash/2
]).
%% Keyspace filters
-export([
make_keyspace_filter/2,
compute_initial_seek/1,
compute_next_seek/2,
compute_time_seek/3,
compute_topic_seek/4
]).
-export_type([db/0, stream/0, iterator/0, schema/0]).
-export_type([options/0]).
-export_type([iteration_options/0]).
-compile(
{inline, [
bitwise_concat/3,
ones/1,
successor/1,
topic_hash_matches/3,
time_matches/3
]}
).
%%================================================================================
%% Type declarations
%%================================================================================
-opaque stream() :: emqx_ds:topic_filter().
-type topic() :: emqx_ds:topic().
-type topic_filter() :: emqx_ds:topic_filter().
-type time() :: emqx_ds:time().
%% Number of bits
-type bits() :: non_neg_integer().
%% Key of a RocksDB record.
-type key() :: binary().
%% Distribution of entropy among topic levels.
%% Example: [4, 8, 16] means that level 1 gets 4 bits, level 2 gets 8 bits,
%% and _rest of levels_ (if any) get 16 bits.
-type bits_per_level() :: [bits(), ...].
-type options() :: #{
%% Number of bits in a message timestamp.
timestamp_bits := bits(),
%% Number of bits in a key allocated to each level in a message topic.
topic_bits_per_level := bits_per_level(),
%% Maximum granularity of iteration over time.
epoch := time(),
iteration => iteration_options(),
cf_options => emqx_ds_storage_layer:db_cf_options()
}.
-type iteration_options() :: #{
%% Request periodic iterator refresh.
%% This might be helpful during replays taking a lot of time (e.g. tens of seconds).
%% Note that `{every, 1000}` means 1000 _operations_ with the iterator which is not
%% the same as 1000 replayed messages.
iterator_refresh => {every, _NumOperations :: pos_integer()}
}.
%% Persistent configuration of the generation, it is used to create db
%% record when the database is reopened
-record(schema, {keymapper :: keymapper()}).
-opaque schema() :: #schema{}.
-record(db, {
shard :: emqx_ds:shard(),
handle :: rocksdb:db_handle(),
cf :: rocksdb:cf_handle(),
keymapper :: keymapper(),
write_options = [{sync, true}] :: emqx_ds_storage_layer:db_write_options(),
read_options = [] :: emqx_ds_storage_layer:db_read_options()
}).
-record(it, {
handle :: rocksdb:itr_handle(),
filter :: keyspace_filter(),
cursor :: binary() | undefined,
next_action :: {seek, binary()} | next,
refresh_counter :: {non_neg_integer(), pos_integer()} | undefined
}).
-record(filter, {
keymapper :: keymapper(),
topic_filter :: topic_filter(),
start_time :: integer(),
hash_bitfilter :: integer(),
hash_bitmask :: integer(),
time_bitfilter :: integer(),
time_bitmask :: integer()
}).
% NOTE
% Keymapper decides how to map messages into RocksDB column family keyspace.
-record(keymapper, {
source :: [bitsource(), ...],
bitsize :: bits(),
epoch :: non_neg_integer()
}).
-type bitsource() ::
%% Consume `_Size` bits from timestamp starting at `_Offset`th bit.
%% TODO consistency
{timestamp, _Offset :: bits(), _Size :: bits()}
%% Consume next topic level (either one or all of them) and compute `_Size` bits-wide hash.
| {hash, level | levels, _Size :: bits()}.
-opaque db() :: #db{}.
-opaque iterator() :: #it{}.
-type serialized_iterator() :: binary().
-type keymapper() :: #keymapper{}.
-type keyspace_filter() :: #filter{}.
%%================================================================================
%% API funcions
%%================================================================================
%% Create a new column family for the generation and a serializable representation of the schema
-spec create_new(rocksdb:db_handle(), emqx_ds_storage_layer:gen_id(), options()) ->
{schema(), emqx_ds_storage_layer:cf_refs()}.
create_new(DBHandle, GenId, Options) ->
CFName = data_cf(GenId),
CFOptions = maps:get(cf_options, Options, []),
{ok, CFHandle} = rocksdb:create_column_family(DBHandle, CFName, CFOptions),
Schema = #schema{keymapper = make_keymapper(Options)},
{Schema, [{CFName, CFHandle}]}.
%% Reopen the database
-spec open(
emqx_ds:shard(),
rocksdb:db_handle(),
emqx_ds_storage_layer:gen_id(),
emqx_ds_storage_layer:cf_refs(),
schema()
) ->
db().
open(Shard, DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) ->
{value, {_, CFHandle}} = lists:keysearch(data_cf(GenId), 1, CFs),
#db{
shard = Shard,
handle = DBHandle,
cf = CFHandle,
keymapper = Keymapper
}.
-spec make_keymapper(options()) -> keymapper().
make_keymapper(#{
timestamp_bits := TimestampBits,
topic_bits_per_level := BitsPerLevel,
epoch := MaxEpoch
}) ->
TimestampLSBs = min(TimestampBits, floor(math:log2(MaxEpoch))),
TimestampMSBs = TimestampBits - TimestampLSBs,
NLevels = length(BitsPerLevel),
{LevelBits, [TailLevelsBits]} = lists:split(NLevels - 1, BitsPerLevel),
Source = lists:flatten([
[{timestamp, TimestampLSBs, TimestampMSBs} || TimestampMSBs > 0],
[{hash, level, Bits} || Bits <- LevelBits],
{hash, levels, TailLevelsBits},
[{timestamp, 0, TimestampLSBs} || TimestampLSBs > 0]
]),
#keymapper{
source = Source,
bitsize = lists:sum([S || {_, _, S} <- Source]),
epoch = 1 bsl TimestampLSBs
}.
-spec store(db(), emqx_guid:guid(), emqx_ds:time(), topic(), binary()) ->
ok | {error, _TODO}.
store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, MessagePayload) ->
Key = make_message_key(Topic, PublishedAt, MessageID, DB#db.keymapper),
Value = make_message_value(Topic, MessagePayload),
rocksdb:put(DBHandle, CFHandle, Key, Value, DB#db.write_options).
-spec delete(db(), emqx_guid:guid(), emqx_ds:time(), topic()) ->
ok | {error, _TODO}.
delete(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic) ->
Key = make_message_key(Topic, PublishedAt, MessageID, DB#db.keymapper),
rocksdb:delete(DBHandle, CFHandle, Key, DB#db.write_options).
-spec get_streams(db(), emqx_ds:topic_filter(), emqx_ds:time()) ->
[stream()].
get_streams(_, TopicFilter, _) ->
[{0, TopicFilter}].
-spec make_iterator(db(), emqx_ds:replay(), iteration_options()) ->
% {error, invalid_start_time}? might just start from the beginning of time
% and call it a day: client violated the contract anyway.
{ok, iterator()} | {error, _TODO}.
make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, Replay, Options) ->
case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of
{ok, ITHandle} ->
Filter = make_keyspace_filter(Replay, DB#db.keymapper),
InitialSeek = combine(compute_initial_seek(Filter), <<>>, DB#db.keymapper),
RefreshCounter = make_refresh_counter(maps:get(iterator_refresh, Options, undefined)),
{ok, #it{
handle = ITHandle,
filter = Filter,
next_action = {seek, InitialSeek},
refresh_counter = RefreshCounter
}};
Err ->
Err
end.
-spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}.
next(It0 = #it{filter = #filter{keymapper = Keymapper}}) ->
It = maybe_refresh_iterator(It0),
case rocksdb:iterator_move(It#it.handle, It#it.next_action) of
% spec says `{ok, Key}` is also possible but the implementation says it's not
{ok, Key, Value} ->
% Preserve last seen key in the iterator so it could be restored / refreshed later.
ItNext = It#it{cursor = Key},
Bitstring = extract(Key, Keymapper),
case match_next(Bitstring, Value, It#it.filter) of
{_Topic, Payload} ->
{value, Payload, ItNext#it{next_action = next}};
next ->
next(ItNext#it{next_action = next});
NextBitstring when is_integer(NextBitstring) ->
NextSeek = combine(NextBitstring, <<>>, Keymapper),
next(ItNext#it{next_action = {seek, NextSeek}});
none ->
stop_iteration(ItNext)
end;
{error, invalid_iterator} ->
stop_iteration(It);
{error, iterator_closed} ->
{error, closed}
end.
-spec preserve_iterator(iterator()) -> serialized_iterator().
preserve_iterator(#it{
cursor = Cursor,
filter = #filter{
topic_filter = TopicFilter,
start_time = StartTime
}
}) ->
State = #{
v => 1,
cursor => Cursor,
replay => {TopicFilter, StartTime}
},
term_to_binary(State).
-spec restore_iterator(db(), serialized_iterator()) ->
{ok, iterator()} | {error, _TODO}.
restore_iterator(DB, Serial) when is_binary(Serial) ->
State = binary_to_term(Serial),
restore_iterator(DB, State);
restore_iterator(DB, #{
v := 1,
cursor := Cursor,
replay := Replay = {_TopicFilter, _StartTime}
}) ->
Options = #{}, % TODO: passthrough options
case make_iterator(DB, Replay, Options) of
{ok, It} when Cursor == undefined ->
% Iterator was preserved right after it has been made.
{ok, It};
{ok, It} ->
% Iterator was preserved mid-replay, seek right past the last seen key.
{ok, It#it{cursor = Cursor, next_action = {seek, successor(Cursor)}}};
Err ->
Err
end.
-spec refresh_iterator(iterator()) -> iterator().
refresh_iterator(It = #it{handle = Handle, cursor = Cursor, next_action = Action}) ->
case rocksdb:iterator_refresh(Handle) of
ok when Action =:= next ->
% Now the underlying iterator is invalid, need to seek instead.
It#it{next_action = {seek, successor(Cursor)}};
ok ->
% Now the underlying iterator is invalid, but will seek soon anyway.
It;
{error, _} ->
% Implementation could in theory return an {error, ...} tuple.
% Supposedly our best bet is to ignore it.
% TODO logging?
It
end.
%%================================================================================
%% Internal exports
%%================================================================================
-spec keymapper_info(keymapper()) ->
#{source := [bitsource()], bitsize := bits(), epoch := time()}.
keymapper_info(#keymapper{source = Source, bitsize = Bitsize, epoch = Epoch}) ->
#{source => Source, bitsize => Bitsize, epoch => Epoch}.
make_message_key(Topic, PublishedAt, MessageID, Keymapper) ->
combine(compute_bitstring(Topic, PublishedAt, Keymapper), MessageID, Keymapper).
make_message_value(Topic, MessagePayload) ->
term_to_binary({Topic, MessagePayload}).
unwrap_message_value(Binary) ->
binary_to_term(Binary).
-spec combine(_Bitstring :: integer(), emqx_guid:guid() | <<>>, keymapper()) ->
key().
combine(Bitstring, MessageID, #keymapper{bitsize = Size}) ->
<<Bitstring:Size/integer, MessageID/binary>>.
-spec extract(key(), keymapper()) ->
_Bitstring :: integer().
extract(Key, #keymapper{bitsize = Size}) ->
<<Bitstring:Size/integer, _MessageID/binary>> = Key,
Bitstring.
-spec compute_bitstring(topic_filter(), time(), keymapper()) -> integer().
compute_bitstring(TopicFilter, Timestamp, #keymapper{source = Source}) ->
compute_bitstring(TopicFilter, Timestamp, Source, 0).
-spec compute_topic_bitmask(topic_filter(), keymapper()) -> integer().
compute_topic_bitmask(TopicFilter, #keymapper{source = Source}) ->
compute_topic_bitmask(TopicFilter, Source, 0).
-spec compute_time_bitmask(keymapper()) -> integer().
compute_time_bitmask(#keymapper{source = Source}) ->
compute_time_bitmask(Source, 0).
-spec hash(term(), bits()) -> integer().
hash(Input, Bits) ->
% at most 32 bits
erlang:phash2(Input, 1 bsl Bits).
-spec make_keyspace_filter(emqx_ds:replay(), keymapper()) -> keyspace_filter().
make_keyspace_filter({TopicFilter, StartTime}, Keymapper) ->
Bitstring = compute_bitstring(TopicFilter, StartTime, Keymapper),
HashBitmask = compute_topic_bitmask(TopicFilter, Keymapper),
TimeBitmask = compute_time_bitmask(Keymapper),
HashBitfilter = Bitstring band HashBitmask,
TimeBitfilter = Bitstring band TimeBitmask,
#filter{
keymapper = Keymapper,
topic_filter = TopicFilter,
start_time = StartTime,
hash_bitfilter = HashBitfilter,
hash_bitmask = HashBitmask,
time_bitfilter = TimeBitfilter,
time_bitmask = TimeBitmask
}.
-spec compute_initial_seek(keyspace_filter()) -> integer().
compute_initial_seek(#filter{hash_bitfilter = HashBitfilter, time_bitfilter = TimeBitfilter}) ->
% Should be the same as `compute_initial_seek(0, Filter)`.
HashBitfilter bor TimeBitfilter.
-spec compute_next_seek(integer(), keyspace_filter()) -> integer().
compute_next_seek(
Bitstring,
Filter = #filter{
hash_bitfilter = HashBitfilter,
hash_bitmask = HashBitmask,
time_bitfilter = TimeBitfilter,
time_bitmask = TimeBitmask
}
) ->
HashMatches = topic_hash_matches(Bitstring, HashBitfilter, HashBitmask),
TimeMatches = time_matches(Bitstring, TimeBitfilter, TimeBitmask),
compute_next_seek(HashMatches, TimeMatches, Bitstring, Filter).
%%================================================================================
%% Internal functions
%%================================================================================
compute_bitstring(Topic, Timestamp, [{timestamp, Offset, Size} | Rest], Acc) ->
I = (Timestamp bsr Offset) band ones(Size),
compute_bitstring(Topic, Timestamp, Rest, bitwise_concat(Acc, I, Size));
compute_bitstring([], Timestamp, [{hash, level, Size} | Rest], Acc) ->
I = hash(<<"/">>, Size),
compute_bitstring([], Timestamp, Rest, bitwise_concat(Acc, I, Size));
compute_bitstring([Level | Tail], Timestamp, [{hash, level, Size} | Rest], Acc) ->
I = hash(Level, Size),
compute_bitstring(Tail, Timestamp, Rest, bitwise_concat(Acc, I, Size));
compute_bitstring(Tail, Timestamp, [{hash, levels, Size} | Rest], Acc) ->
I = hash(Tail, Size),
compute_bitstring(Tail, Timestamp, Rest, bitwise_concat(Acc, I, Size));
compute_bitstring(_, _, [], Acc) ->
Acc.
compute_topic_bitmask(Filter, [{timestamp, _, Size} | Rest], Acc) ->
compute_topic_bitmask(Filter, Rest, bitwise_concat(Acc, 0, Size));
compute_topic_bitmask(['#'], [{hash, _, Size} | Rest], Acc) ->
compute_topic_bitmask(['#'], Rest, bitwise_concat(Acc, 0, Size));
compute_topic_bitmask(['+' | Tail], [{hash, _, Size} | Rest], Acc) ->
compute_topic_bitmask(Tail, Rest, bitwise_concat(Acc, 0, Size));
compute_topic_bitmask([], [{hash, level, Size} | Rest], Acc) ->
compute_topic_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size));
compute_topic_bitmask([_ | Tail], [{hash, level, Size} | Rest], Acc) ->
compute_topic_bitmask(Tail, Rest, bitwise_concat(Acc, ones(Size), Size));
compute_topic_bitmask(Tail, [{hash, levels, Size} | Rest], Acc) ->
Mask =
case lists:member('+', Tail) orelse lists:member('#', Tail) of
true -> 0;
false -> ones(Size)
end,
compute_topic_bitmask([], Rest, bitwise_concat(Acc, Mask, Size));
compute_topic_bitmask(_, [], Acc) ->
Acc.
compute_time_bitmask([{timestamp, _, Size} | Rest], Acc) ->
compute_time_bitmask(Rest, bitwise_concat(Acc, ones(Size), Size));
compute_time_bitmask([{hash, _, Size} | Rest], Acc) ->
compute_time_bitmask(Rest, bitwise_concat(Acc, 0, Size));
compute_time_bitmask([], Acc) ->
Acc.
bitwise_concat(Acc, Item, ItemSize) ->
(Acc bsl ItemSize) bor Item.
ones(Bits) ->
1 bsl Bits - 1.
-spec successor(key()) -> key().
successor(Key) ->
<<Key/binary, 0:8>>.
%% |123|345|678|
%% foo bar baz
%% |123|000|678| - |123|fff|678|
%% foo + baz
%% |fff|000|fff|
%% |123|000|678|
%% |123|056|678| & |fff|000|fff| = |123|000|678|.
match_next(
Bitstring,
Value,
Filter = #filter{
topic_filter = TopicFilter,
hash_bitfilter = HashBitfilter,
hash_bitmask = HashBitmask,
time_bitfilter = TimeBitfilter,
time_bitmask = TimeBitmask
}
) ->
HashMatches = topic_hash_matches(Bitstring, HashBitfilter, HashBitmask),
TimeMatches = time_matches(Bitstring, TimeBitfilter, TimeBitmask),
case HashMatches and TimeMatches of
true ->
Message = {Topic, _Payload} = unwrap_message_value(Value),
case emqx_topic:match(Topic, TopicFilter) of
true ->
Message;
false ->
next
end;
false ->
compute_next_seek(HashMatches, TimeMatches, Bitstring, Filter)
end.
%% `Bitstring` is out of the hash space defined by `HashBitfilter`.
compute_next_seek(
_HashMatches = false,
_TimeMatches,
Bitstring,
Filter = #filter{
keymapper = Keymapper,
hash_bitfilter = HashBitfilter,
hash_bitmask = HashBitmask,
time_bitfilter = TimeBitfilter,
time_bitmask = TimeBitmask
}
) ->
NextBitstring = compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper),
case NextBitstring of
none ->
none;
_ ->
TimeMatches = time_matches(NextBitstring, TimeBitfilter, TimeBitmask),
compute_next_seek(true, TimeMatches, NextBitstring, Filter)
end;
%% `Bitstring` is out of the time range defined by `TimeBitfilter`.
compute_next_seek(
_HashMatches = true,
_TimeMatches = false,
Bitstring,
#filter{
time_bitfilter = TimeBitfilter,
time_bitmask = TimeBitmask
}
) ->
compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask);
compute_next_seek(true, true, Bitstring, _It) ->
Bitstring.
topic_hash_matches(Bitstring, HashBitfilter, HashBitmask) ->
(Bitstring band HashBitmask) == HashBitfilter.
time_matches(Bitstring, TimeBitfilter, TimeBitmask) ->
(Bitstring band TimeBitmask) >= TimeBitfilter.
compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask) ->
% Replace the bits of the timestamp in `Bistring` with bits from `Timebitfilter`.
(Bitstring band (bnot TimeBitmask)) bor TimeBitfilter.
%% Find the closest bitstring which is:
%% * greater than `Bitstring`,
%% * and falls into the hash space defined by `HashBitfilter`.
%% Note that the result can end up "back" in time and out of the time range.
compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) ->
Sources = Keymapper#keymapper.source,
Size = Keymapper#keymapper.bitsize,
compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size).
compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) ->
% NOTE
% We're iterating through `Substring` here, in lockstep with `HashBitfilter`
% and `HashBitmask`, starting from least signigicant bits. Each bitsource in
% `Sources` has a bitsize `S` and, accordingly, gives us a sub-bitstring `S`
% bits long which we interpret as a "digit". There are 2 flavors of those
% "digits":
% * regular digit with 2^S possible values,
% * degenerate digit with exactly 1 possible value U (represented with 0).
% Our goal here is to find a successor of `Bistring` and perform a kind of
% digit-by-digit addition operation with carry propagation.
NextSeek = zipfoldr3(
fun(Source, Substring, Filter, LBitmask, Offset, Acc) ->
case Source of
{hash, _, S} when LBitmask =:= 0 ->
% Regular case
bitwise_add_digit(Substring, Acc, S, Offset);
{hash, _, _} when LBitmask =/= 0, Substring < Filter ->
% Degenerate case, I_digit < U, no overflow.
% Successor is `U bsl Offset` which is equivalent to 0.
0;
{hash, _, S} when LBitmask =/= 0, Substring > Filter ->
% Degenerate case, I_digit > U, overflow.
% Successor is `(1 bsl Size + U) bsl Offset`.
overflow_digit(S, Offset);
{hash, _, S} when LBitmask =/= 0 ->
% Degenerate case, I_digit = U
% Perform digit addition with I_digit = 0, assuming "digit" has
% 0 bits of information (but is `S` bits long at the same time).
% This will overflow only if the result of previous iteration
% was an overflow.
bitwise_add_digit(0, Acc, 0, S, Offset);
{timestamp, _, S} ->
% Regular case
bitwise_add_digit(Substring, Acc, S, Offset)
end
end,
0,
Bitstring,
HashBitfilter,
HashBitmask,
Size,
Sources
),
case NextSeek bsr Size of
_Carry = 0 ->
% Found the successor.
% We need to recover values of those degenerate digits which we
% represented with 0 during digit-by-digit iteration.
NextSeek bor (HashBitfilter band HashBitmask);
_Carry = 1 ->
% We got "carried away" past the range, time to stop iteration.
none
end.
bitwise_add_digit(Digit, Number, Width, Offset) ->
bitwise_add_digit(Digit, Number, Width, Width, Offset).
%% Add "digit" (represented with integer `Digit`) to the `Number` assuming
%% this digit starts at `Offset` bits in `Number` and is `Width` bits long.
%% Perform an overflow if the result of addition would not fit into `Bits`
%% bits.
bitwise_add_digit(Digit, Number, Bits, Width, Offset) ->
Sum = (Digit bsl Offset) + Number,
case (Sum bsr Offset) < (1 bsl Bits) of
true -> Sum;
false -> overflow_digit(Width, Offset)
end.
%% Constuct a number which denotes an overflow of digit that starts at
%% `Offset` bits and is `Width` bits long.
overflow_digit(Width, Offset) ->
(1 bsl Width) bsl Offset.
%% Iterate through sub-bitstrings of 3 integers in lockstep, starting from least
%% significant bits first.
%%
%% Each integer is assumed to be `Size` bits long. Lengths of sub-bitstring are
%% specified in `Sources` list, in order from most significant bits to least
%% significant. Each iteration calls `FoldFun` with:
%% * bitsource that was used to extract sub-bitstrings,
%% * 3 sub-bitstrings in integer representation,
%% * bit offset into integers,
%% * current accumulator.
-spec zipfoldr3(FoldFun, Acc, integer(), integer(), integer(), _Size :: bits(), [bitsource()]) ->
Acc
when
FoldFun :: fun((bitsource(), integer(), integer(), integer(), _Offset :: bits(), Acc) -> Acc).
zipfoldr3(_FoldFun, Acc, _, _, _, 0, []) ->
Acc;
zipfoldr3(FoldFun, Acc, I1, I2, I3, Offset, [Source = {_, _, S} | Rest]) ->
OffsetNext = Offset - S,
AccNext = zipfoldr3(FoldFun, Acc, I1, I2, I3, OffsetNext, Rest),
FoldFun(
Source,
substring(I1, OffsetNext, S),
substring(I2, OffsetNext, S),
substring(I3, OffsetNext, S),
OffsetNext,
AccNext
).
substring(I, Offset, Size) ->
(I bsr Offset) band ones(Size).
%% @doc Generate a column family ID for the MQTT messages
-spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()].
data_cf(GenId) ->
?MODULE_STRING ++ integer_to_list(GenId).
make_refresh_counter({every, N}) when is_integer(N), N > 0 ->
{0, N};
make_refresh_counter(undefined) ->
undefined.
maybe_refresh_iterator(It = #it{refresh_counter = {N, N}}) ->
refresh_iterator(It#it{refresh_counter = {0, N}});
maybe_refresh_iterator(It = #it{refresh_counter = {M, N}}) ->
It#it{refresh_counter = {M + 1, N}};
maybe_refresh_iterator(It = #it{refresh_counter = undefined}) ->
It.
stop_iteration(It) ->
ok = rocksdb:iterator_close(It#it.handle),
none.

View File

@ -129,7 +129,7 @@ t_get_streams(_Config) ->
t_replay(_Config) ->
%% Create concrete topics:
Topics = [<<"foo/bar">>, <<"foo/bar/baz">>],
Timestamps = lists:seq(1, 10),
Timestamps = lists:seq(1, 10_000, 100),
Batch1 = [
make_message(PublishedAt, Topic, integer_to_binary(PublishedAt))
|| Topic <- Topics, PublishedAt <- Timestamps
@ -140,10 +140,10 @@ t_replay(_Config) ->
begin
B = integer_to_binary(I),
make_message(
TS, <<"wildcard/", B/binary, "/suffix/", Suffix/binary>>, integer_to_binary(TS)
TS, <<"wildcard/", B/binary, "/suffix/", Suffix/binary>>, integer_to_binary(TS)
)
end
|| I <- lists:seq(1, 200), TS <- lists:seq(1, 10), Suffix <- [<<"foo">>, <<"bar">>]
|| I <- lists:seq(1, 200), TS <- Timestamps, Suffix <- [<<"foo">>, <<"bar">>]
],
ok = emqx_ds_storage_layer:store_batch(?SHARD, Batch2, []),
%% Check various topic filters:
@ -158,6 +158,9 @@ t_replay(_Config) ->
?assert(check(?SHARD, <<"foo/+/+">>, 0, Messages)),
?assert(check(?SHARD, <<"+/+/+">>, 0, Messages)),
?assert(check(?SHARD, <<"+/+/baz">>, 0, Messages)),
%% Restart shard to make sure trie is persisted and restored:
ok = emqx_ds_storage_layer_sup:stop_shard(?SHARD),
{ok, _} = emqx_ds_storage_layer_sup:start_shard(?SHARD, #{}),
%% Learned wildcard topics:
?assertNot(check(?SHARD, <<"wildcard/1000/suffix/foo">>, 0, [])),
?assert(check(?SHARD, <<"wildcard/1/suffix/foo">>, 0, Messages)),
@ -179,23 +182,24 @@ check(Shard, TopicFilter, StartTime, ExpectedMessages) ->
ExpectedMessages
),
?check_trace(
#{timetrap => 10_000},
begin
Dump = dump_messages(Shard, TopicFilter, StartTime),
verify_dump(TopicFilter, StartTime, Dump),
Missing = ExpectedFiltered -- Dump,
Extras = Dump -- ExpectedFiltered,
?assertMatch(
#{missing := [], unexpected := []},
#{
missing => Missing,
unexpected => Extras,
topic_filter => TopicFilter,
start_time => StartTime
}
)
end,
[]),
#{timetrap => 10_000},
begin
Dump = dump_messages(Shard, TopicFilter, StartTime),
verify_dump(TopicFilter, StartTime, Dump),
Missing = ExpectedFiltered -- Dump,
Extras = Dump -- ExpectedFiltered,
?assertMatch(
#{missing := [], unexpected := []},
#{
missing => Missing,
unexpected => Extras,
topic_filter => TopicFilter,
start_time => StartTime
}
)
end,
[]
),
length(ExpectedFiltered) > 0.
verify_dump(TopicFilter, StartTime, Dump) ->
@ -227,78 +231,26 @@ dump_messages(Shard, TopicFilter, StartTime) ->
).
dump_stream(Shard, Stream, TopicFilter, StartTime) ->
BatchSize = 3,
BatchSize = 100,
{ok, Iterator} = emqx_ds_storage_layer:make_iterator(
Shard, Stream, parse_topic(TopicFilter), StartTime
),
Loop = fun F(It, 0) ->
error({too_many_iterations, It});
F(It, N) ->
case emqx_ds_storage_layer:next(Shard, It, BatchSize) of
end_of_stream ->
[];
{ok, _NextIt, []} ->
[];
{ok, NextIt, Batch} ->
Batch ++ F(NextIt, N - 1)
end
Loop = fun
F(It, 0) ->
error({too_many_iterations, It});
F(It, N) ->
case emqx_ds_storage_layer:next(Shard, It, BatchSize) of
end_of_stream ->
[];
{ok, _NextIt, []} ->
[];
{ok, NextIt, Batch} ->
Batch ++ F(NextIt, N - 1)
end
end,
MaxIterations = 1000,
MaxIterations = 1000000,
Loop(Iterator, MaxIterations).
%% Smoke test for iteration with wildcard topic filter
%% t_iterate_wildcard(_Config) ->
%% %% Prepare data:
%% Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"],
%% Timestamps = lists:seq(1, 10),
%% _ = [
%% store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt}))
%% || Topic <- Topics, PublishedAt <- Timestamps
%% ],
%% ?assertEqual(
%% lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- Timestamps]),
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 0)])
%% ),
%% ?assertEqual(
%% [],
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 10 + 1)])
%% ),
%% ?assertEqual(
%% lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- lists:seq(5, 10)]),
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 5)])
%% ),
%% ?assertEqual(
%% lists:sort([
%% {Topic, PublishedAt}
%% || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps
%% ]),
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)])
%% ),
%% ?assertEqual(
%% lists:sort([{"foo/bar", PublishedAt} || PublishedAt <- Timestamps]),
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/+", 0)])
%% ),
%% ?assertEqual(
%% [],
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/+/bar", 0)])
%% ),
%% ?assertEqual(
%% lists:sort([
%% {Topic, PublishedAt}
%% || Topic <- ["foo/bar", "foo/bar/baz", "a/bar"], PublishedAt <- Timestamps
%% ]),
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "+/bar/#", 0)])
%% ),
%% ?assertEqual(
%% lists:sort([{Topic, PublishedAt} || Topic <- ["a", "a/bar"], PublishedAt <- Timestamps]),
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 0)])
%% ),
%% ?assertEqual(
%% [],
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/+/+", 0)])
%% ),
%% ok.
%% t_create_gen(_Config) ->
%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 5, ?DEFAULT_CONFIG),
%% ?assertEqual(