From 691a27883dd03b9218607fdc8998fb1d41189598 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 21 Dec 2022 18:23:26 +0300 Subject: [PATCH 01/49] feat(ds): Initial commit --- apps/emqx_replay/src/emqx_replay.app.src | 11 ++ apps/emqx_replay/src/emqx_replay_app.erl | 19 ++ .../src/emqx_replay_message_storage.erl | 169 ++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 apps/emqx_replay/src/emqx_replay.app.src create mode 100644 apps/emqx_replay/src/emqx_replay_app.erl create mode 100644 apps/emqx_replay/src/emqx_replay_message_storage.erl diff --git a/apps/emqx_replay/src/emqx_replay.app.src b/apps/emqx_replay/src/emqx_replay.app.src new file mode 100644 index 000000000..7769e82e9 --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay.app.src @@ -0,0 +1,11 @@ +%% -*- mode: erlang -*- +{application, emqx_replay, [ + {description, "Message persistence and subscription replays for EMQX"}, + % strict semver, bump manually! + {vsn, "0.1.0"}, + {modules, []}, + {registered, []}, + {applications, [kernel, stdlib, rocksdb]}, + {mod, {emqx_replay_app, []}}, + {env, []} +]}. diff --git a/apps/emqx_replay/src/emqx_replay_app.erl b/apps/emqx_replay/src/emqx_replay_app.erl new file mode 100644 index 000000000..090299150 --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay_app.erl @@ -0,0 +1,19 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_replay_app). + +-export([]). diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl new file mode 100644 index 000000000..45ad2beab --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -0,0 +1,169 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_replay_message_storage). + +%% API: +-export([open/2, close/1]). + +-export([store/5]). +-export([make_iterator/3]). + +%% Debug/troubleshooting: +-export([make_message_key/3, compute_topic_hash/1, hash/2, combine/3]). + +-export_type([db/0, iterator/0]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +%% see rocksdb:db_options() +-type options() :: proplists:proplist(). + +%% parsed +-type topic() :: list(binary()). + +%% TODO granularity? +-type time() :: integer(). + +-record(db, { + db :: rocksdb:db_handle() +}). + +-record(it, { + handle :: rocksdb:itr_handle(), + topic_filter :: emqx_topic:words(), + bitmask :: integer(), + start_time :: time() +}). + +-opaque db() :: #db{}. + +-opaque iterator() :: #it{}. + +%%================================================================================ +%% API funcions +%%================================================================================ + +-spec open(file:filename_all(), options()) -> + {ok, db()} | {error, _TODO}. +open(Filename, Options) -> + case rocksdb:open(Filename, [{create_if_missing, true}, Options]) of + {ok, Handle} -> + {ok, #db{db = Handle}}; + Error -> + Error + end. + +-spec close(db()) -> ok | {error, _}. +close(#db{db = DB}) -> + rocksdb:close(DB). + +-spec store(db(), emqx_guid:guid(), time(), topic(), binary()) -> + ok. +store(#db{db = DB}, MessageID, PublishedAt, Topic, MessagePayload) -> + Key = make_message_key(MessageID, Topic, PublishedAt), + Value = make_message_value(Topic, MessagePayload), + rocksdb:put(DB, Key, Value, [{sync, true}]). + +-spec make_iterator(db(), emqx_topic:words(), time() | earliest) -> + {ok, iterator()} | {error, invalid_start_time}. +make_iterator(#db{db = DBHandle}, TopicFilter, StartTime) -> + case rocksdb:iterator(DBHandle, []) of + {ok, ITHandle} -> + #it{ + handle = ITHandle, + topic_filter = TopicFilter, + start_time = StartTime, + bitmask = make_bitmask(TopicFilter) + }; + Err -> + Err + end. + +-spec next(iterator()) -> {value, binary()} | none. +next(It) -> + error(noimpl). + +%%================================================================================ +%% Internal exports +%%================================================================================ + +-define(TOPIC_LEVELS_ENTROPY_BITS, [8, 8, 32, 16]). + +make_message_key(MessageID, Topic, PublishedAt) -> + combine(compute_topic_hash(Topic), PublishedAt, MessageID). + +make_message_value(Topic, MessagePayload) -> + term_to_binary({Topic, MessagePayload}). + +combine(TopicHash, PublishedAt, MessageID) -> + <>. + +compute_topic_hash(Topic) -> + compute_topic_hash(Topic, ?TOPIC_LEVELS_ENTROPY_BITS, 0). + +hash(Input, Bits) -> + % at most 32 bits + erlang:phash2(Input, 1 bsl Bits). + +-spec make_bitmask(emqx_topic:words()) -> integer(). +make_bitmask(TopicFilter) -> + make_bitmask(TopicFilter, ?TOPIC_LEVELS_ENTROPY_BITS). + +%%================================================================================ +%% Internal functions +%%================================================================================ + +compute_topic_hash(LevelsRest, [Bits], Acc) -> + Hash = hash(LevelsRest, Bits), + Acc bsl Bits + Hash; +compute_topic_hash([], [Bits | BitsRest], Acc) -> + Hash = hash(<<"/">>, Bits), + compute_topic_hash([], BitsRest, Acc bsl Bits + Hash); +compute_topic_hash([Level | LevelsRest], [Bits | BitsRest], Acc) -> + Hash = hash(Level, Bits), + compute_topic_hash(LevelsRest, BitsRest, Acc bsl Bits + Hash). + +make_bitmask(LevelsRest, [Bits], Acc) -> + Hash = hash(LevelsRest, Bits), + Acc bsl Bits + Hash; +make_bitmask([], [Bits | BitsRest], Acc) -> + Hash = hash(<<"/">>, Bits), + make_bitmask([], BitsRest, Acc bsl Bits + Hash); +make_bitmask([Level | LevelsRest], [Bits | BitsRest], Acc) -> + Hash = case Level of + '+' -> + 0; + Bin when is_binary(Bin) -> + 1 bsl Bits - 1; + + Hash = hash(Level, Bits), + make_bitmask(LevelsRest, BitsRest, Acc bsl Bits + Hash). + +%% |123|345|678| +%% foo bar baz + +%% |123|000|678| - |123|fff|678| + +%% foo + baz + +%% |fff|000|fff| + +%% |123|000|678| + +%% |123|056|678| & |fff|000|fff| = |123|000|678|. From 27b925405bbdeb1bc6557eced1918f08eca10c9b Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 22 Dec 2022 15:02:15 +0100 Subject: [PATCH 02/49] test(ds): Add testcase --- .../test/emqx_replay_storage_SUITE.erl | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 apps/emqx_replay/test/emqx_replay_storage_SUITE.erl diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl new file mode 100644 index 000000000..583665eba --- /dev/null +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -0,0 +1,83 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_replay_storage_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("proper/include/proper.hrl"). + +-define(DB_FILE, ?MODULE_STRING). + +%% Smoke test of store function +t_store(Config) -> + DB = ?config(handle, Config), + MessageID = emqx_guid:gen(), + PublishedAt = 1000, + Topic = [<<"foo">>, <<"bar">>], + Payload = <<"message">>, + ?assertMatch(ok, emqx_replay_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload)). + +%% Smoke test for iteration through a concrete topic +t_iterate(Config) -> + DB = ?config(handle, Config), + %% Prepare data: + Topics = [[<<"foo">>, <<"bar">>], [<<"foo">>, <<"bar">>, <<"baz">>], [<<"a">>]], + Timestamps = lists:seq(1, 10), + [ + emqx_replay_message_storage:store( + DB, + emqx_guid:gen(), + PublishedAt, + Topic, + integer_to_binary(PublishedAt) + ) + || Topic <- Topics, PublishedAt <- Timestamps + ], + %% Iterate through individual topics: + [ + begin + {ok, It} = emqx_replay_message_storage:make_iterator(DB, Topic, 0), + Values = iterate(It), + ?assertEqual(Values, lists:map(fun integer_to_binary/1, Timestamps)) + end + || Topic <- Topics + ], + ok. + +iterate(It) -> + case emqx_replay_message_storage:next(It) of + {value, Val} -> + [Val | iterate(It)]; + none -> + [] + end. + +%% CT callbacks + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_testcase(TC, Config) -> + Filename = filename:join(?MODULE_STRING, atom_to_list(TC)), + ok = filelib:ensure_dir(Filename), + {ok, DB} = emqx_replay_message_storage:open(Filename, []), + [{handle, DB} | Config]. + +end_per_testcase(_TC, Config) -> + DB = ?config(handle, Config), + catch emqx_replay_message_storage:close(DB). From 9c1cd4911d974e01d937174330d6b43e28538482 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 23 Dec 2022 17:37:25 +0300 Subject: [PATCH 03/49] feat(ds): Implement iterator next --- .../src/emqx_replay_message_storage.erl | 297 ++++++++++++++++-- 1 file changed, 271 insertions(+), 26 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 45ad2beab..e867ad850 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -21,6 +21,7 @@ -export([store/5]). -export([make_iterator/3]). +-export([next/1]). %% Debug/troubleshooting: -export([make_message_key/3, compute_topic_hash/1, hash/2, combine/3]). @@ -41,13 +42,15 @@ -type time() :: integer(). -record(db, { - db :: rocksdb:db_handle() + handle :: rocksdb:db_handle() }). -record(it, { handle :: rocksdb:itr_handle(), + next_action :: {seek, binary()} | next, topic_filter :: emqx_topic:words(), - bitmask :: integer(), + hash_filter :: integer(), + hash_bitmask :: integer(), start_time :: time() }). @@ -64,40 +67,56 @@ open(Filename, Options) -> case rocksdb:open(Filename, [{create_if_missing, true}, Options]) of {ok, Handle} -> - {ok, #db{db = Handle}}; + {ok, #db{handle = Handle}}; Error -> Error end. -spec close(db()) -> ok | {error, _}. -close(#db{db = DB}) -> +close(#db{handle = DB}) -> rocksdb:close(DB). -spec store(db(), emqx_guid:guid(), time(), topic(), binary()) -> - ok. -store(#db{db = DB}, MessageID, PublishedAt, Topic, MessagePayload) -> + ok | {error, _TODO}. +store(#db{handle = DB}, MessageID, PublishedAt, Topic, MessagePayload) -> Key = make_message_key(MessageID, Topic, PublishedAt), Value = make_message_value(Topic, MessagePayload), rocksdb:put(DB, Key, Value, [{sync, true}]). -spec make_iterator(db(), emqx_topic:words(), time() | earliest) -> - {ok, iterator()} | {error, invalid_start_time}. -make_iterator(#db{db = DBHandle}, TopicFilter, StartTime) -> + % {error, invalid_start_time}? might just start from the beginning of time + % and call it a day: client violated the contract anyway. + {ok, iterator()} | {error, _TODO}. +make_iterator(#db{handle = DBHandle}, TopicFilter, StartTime) -> case rocksdb:iterator(DBHandle, []) of {ok, ITHandle} -> + Hash = compute_topic_hash(TopicFilter), + HashBitmask = make_bitmask(TopicFilter), + HashFilter = Hash band HashBitmask, #it{ handle = ITHandle, + next_action = {seek, combine(HashFilter, StartTime, <<>>)}, topic_filter = TopicFilter, start_time = StartTime, - bitmask = make_bitmask(TopicFilter) + hash_filter = HashFilter, + hash_bitmask = HashBitmask }; Err -> Err end. --spec next(iterator()) -> {value, binary()} | none. -next(It) -> - error(noimpl). +-spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}. +next(It = #it{next_action = Action}) -> + case rocksdb:iterator_move(It#it.handle, Action) of + % spec says `{ok, Key}` is also possible but the implementation says it's not + {ok, Key, Value} -> + {TopicHash, PublishedAt} = extract(Key), + match_next(It, TopicHash, PublishedAt, Value); + {error, invalid_iterator} -> + stop_iteration(It); + {error, iterator_closed} -> + {error, closed} + end. %%================================================================================ %% Internal exports @@ -111,9 +130,15 @@ make_message_key(MessageID, Topic, PublishedAt) -> make_message_value(Topic, MessagePayload) -> term_to_binary({Topic, MessagePayload}). +unwrap_message_value(Binary) -> + binary_to_term(Binary). + combine(TopicHash, PublishedAt, MessageID) -> <>. +extract(<>) -> + {TopicHash, PublishedAt}. + compute_topic_hash(Topic) -> compute_topic_hash(Topic, ?TOPIC_LEVELS_ENTROPY_BITS, 0). @@ -123,7 +148,7 @@ hash(Input, Bits) -> -spec make_bitmask(emqx_topic:words()) -> integer(). make_bitmask(TopicFilter) -> - make_bitmask(TopicFilter, ?TOPIC_LEVELS_ENTROPY_BITS). + make_bitmask(TopicFilter, ?TOPIC_LEVELS_ENTROPY_BITS, 0). %%================================================================================ %% Internal functions @@ -139,21 +164,21 @@ compute_topic_hash([Level | LevelsRest], [Bits | BitsRest], Acc) -> Hash = hash(Level, Bits), compute_topic_hash(LevelsRest, BitsRest, Acc bsl Bits + Hash). -make_bitmask(LevelsRest, [Bits], Acc) -> - Hash = hash(LevelsRest, Bits), - Acc bsl Bits + Hash; +make_bitmask(['#'], BitsPerLevel, Acc) -> + Acc bsl lists:sum(BitsPerLevel) + 0; +make_bitmask(['+' | LevelsRest], [Bits | BitsRest], Acc) -> + make_bitmask(LevelsRest, BitsRest, Acc bsl Bits + 0); +make_bitmask(_, [Bits], Acc) -> + Acc bsl Bits + ones(Bits); make_bitmask([], [Bits | BitsRest], Acc) -> - Hash = hash(<<"/">>, Bits), - make_bitmask([], BitsRest, Acc bsl Bits + Hash); -make_bitmask([Level | LevelsRest], [Bits | BitsRest], Acc) -> - Hash = case Level of - '+' -> - 0; - Bin when is_binary(Bin) -> - 1 bsl Bits - 1; + make_bitmask([], BitsRest, Acc bsl Bits + ones(Bits)); +make_bitmask([_ | LevelsRest], [Bits | BitsRest], Acc) -> + make_bitmask(LevelsRest, BitsRest, Acc bsl Bits + ones(Bits)); +make_bitmask(_, [], Acc) -> + Acc. - Hash = hash(Level, Bits), - make_bitmask(LevelsRest, BitsRest, Acc bsl Bits + Hash). +ones(Bits) -> + 1 bsl Bits - 1. %% |123|345|678| %% foo bar baz @@ -167,3 +192,223 @@ make_bitmask([Level | LevelsRest], [Bits | BitsRest], Acc) -> %% |123|000|678| %% |123|056|678| & |fff|000|fff| = |123|000|678|. + +%% Filter = |123|***|678| +%% Key1 = |123|011|108| → Seek = |123|011|678| +%% Key1 = |123|011|679| → Seek = |123|012|678| +%% Key1 = |123|999|679| → Seek = 1|123|000|678| → eos + +%% Filter = |123|***|678|***| +%% Key1 = |123|011|108|121| → Seek = |123|011|678|000| +%% Key1 = |123|011|679|919| → Seek = |123|012|678|000| +%% Key1 = |123|999|679|001| → Seek = 1|123|000|678|000| → eos +%% Key1 = |125|999|179|017| → Seek = 1|123|000|678|000| → eos + +match_next( + It = #it{ + topic_filter = TopicFilter, + hash_filter = HashFilter, + hash_bitmask = HashBitmask, + start_time = StartTime + }, + TopicHash, + PublishedAt, + Value +) -> + HashMatches = (TopicHash band It#it.hash_bitmask) == It#it.hash_filter, + TimeMatches = PublishedAt >= It#it.start_time, + case HashMatches of + true when TimeMatches -> + {Topic, MessagePayload} = unwrap_message_value(Value), + case emqx_topic:match(Topic, TopicFilter) of + true -> + {value, MessagePayload, It#it{next_action = next}}; + false -> + next(It#it{next_action = next}) + end; + true -> + NextAction = {seek, combine(TopicHash, StartTime, <<>>)}, + next(It#it{next_action = NextAction}); + false -> + case compute_next_seek(TopicHash, HashFilter, HashBitmask) of + NextHash when is_integer(NextHash) -> + NextAction = {seek, combine(NextHash, StartTime, <<>>)}, + next(It#it{next_action = NextAction}); + none -> + stop_iteration(It) + end + end. + +stop_iteration(It) -> + ok = rocksdb:iterator_close(It#it.handle), + none. + +compute_next_seek(TopicHash, HashFilter, HashBitmask) -> + compute_next_seek(TopicHash, HashFilter, HashBitmask, ?TOPIC_LEVELS_ENTROPY_BITS). + +compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel) -> + % NOTE + % Ok, this convoluted mess implements a sort of _increment operation_ for some + % strange number in variable bit-width base. There are `Levels` "digits", those + % with `0` level bitmask have `BitsPerLevel` bit-width and those with `111...` + % level bitmask have in some sense 0 bits (because they are fixed "digits" + % with exacly one possible value). + % TODO make at least remotely readable / optimize later + Result = zipfoldr3( + fun(LevelHash, Filter, LevelMask, Bits, Shift, {Carry, Acc}) -> + % io:format(user, "~n *** LH: ~.16B / F: ~.16B / M: ~.16B / Bs: ~B / Sh: ~B~n", [LevelHash, Filter, LevelMask, Bits, Shift]), + % io:format(user, "~n *** Carry: ~B / Acc: ~.16B~n", [Carry, Acc]), + case LevelMask of + 0 when Carry == 0 -> + {0, Acc + (LevelHash bsl Shift)}; + 0 -> + LevelHash1 = LevelHash + Carry, + NextCarry = LevelHash1 bsr Bits, + NextAcc = (LevelHash1 band ones(Bits)) bsl Shift, + {NextCarry, NextAcc}; + _ when (LevelHash + Carry) == Filter -> + {0, Acc + (Filter bsl Shift)}; + _ when (LevelHash + Carry) > Filter -> + {1, Filter bsl Shift}; + _ -> + {0, Filter bsl Shift} + end + end, + {1, 0}, + TopicHash, + HashFilter, + HashBitmask, + BitsPerLevel + ), + case Result of + {_, {_Carry = 0, Next}} -> + Next bor HashFilter; + {_, {_Carry = 1, _}} -> + % we got "carried away" past the range, time to stop iteration + none + end. + +% zipfoldr3(FoldFun, Acc, I1, I2, I3, Shift, [Bits]) -> +% { Shift + Bits +% , FoldFun( I1 band ones(Bits) +% , I2 band ones(Bits) +% , I3 band ones(Bits) +% , Bits, Acc ) }; +zipfoldr3(_FoldFun, Acc, _, _, _, []) -> + {0, Acc}; +zipfoldr3(FoldFun, Acc, I1, I2, I3, [Bits | Rest]) -> + {Shift, AccNext} = zipfoldr3( + FoldFun, + Acc, + I1, + I2, + I3, + Rest + ), + % { FoldFun(I1 band ones(Bits), I2 band ones(Bits), I3 band ones(Bits), Bits, AccNext). + { + Shift + Bits, + FoldFun( + (I1 bsr Shift) band ones(Bits), + (I2 bsr Shift) band ones(Bits), + (I3 bsr Shift) band ones(Bits), + Bits, + Shift, + AccNext + ) + }. + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +make_test_bitmask(TopicFilter) -> + make_bitmask(TopicFilter, [3, 4, 5, 2], 0). + +bitmask_test_() -> + [ + ?_assertEqual( + 2#111_1111_11111_11, + make_test_bitmask([<<"foo">>, <<"bar">>]) + ), + ?_assertEqual( + 2#111_0000_11111_11, + make_test_bitmask([<<"foo">>, '+']) + ), + ?_assertEqual( + 2#111_0000_00000_11, + make_test_bitmask([<<"foo">>, '+', '+']) + ), + ?_assertEqual( + 2#111_0000_11111_00, + make_test_bitmask([<<"foo">>, '+', <<"bar">>, '+']) + ) + ]. + +wildcard_bitmask_test_() -> + [ + ?_assertEqual( + 2#000_0000_00000_00, + make_test_bitmask(['#']) + ), + ?_assertEqual( + 2#111_0000_00000_00, + make_test_bitmask([<<"foo">>, '#']) + ), + ?_assertEqual( + 2#111_1111_11111_00, + make_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, '#']) + ), + ?_assertEqual( + 2#111_1111_11111_11, + make_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, '#']) + ) + ]. + +%% Filter = |123|***|678|***| +%% Mask = |123|***|678|***| +%% Key1 = |123|011|108|121| → Seek = 0 |123|011|678|000| +%% Key2 = |123|011|679|919| → Seek = 0 |123|012|678|000| +%% Key3 = |123|999|679|001| → Seek = 1 |123|000|678|000| → eos +%% Key4 = |125|011|179|017| → Seek = 1 |123|000|678|000| → eos + +compute_test_next_seek(TopicHash, HashFilter, HashBitmask) -> + compute_next_seek(TopicHash, HashFilter, HashBitmask, [8, 8, 16, 12]). + +next_seek_test_() -> + [ + ?_assertMatch( + 16#FD_11_0678_000, + compute_test_next_seek( + 16#FD_11_0108_121, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000 + ) + ), + ?_assertMatch( + 16#FD_12_0678_000, + compute_test_next_seek( + 16#FD_11_0679_919, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000 + ) + ), + ?_assertMatch( + none, + compute_test_next_seek( + 16#FD_FF_0679_001, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000 + ) + ), + ?_assertMatch( + none, + compute_test_next_seek( + 16#FE_11_0179_017, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000 + ) + ) + ]. + +-endif. From cd12338c3f2219c6778ac090412b06e35a0485c2 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 23 Dec 2022 19:21:04 +0300 Subject: [PATCH 04/49] feat(ds): Smoke tests for wildcard iterator scans --- .../src/emqx_replay_message_storage.erl | 32 +++------ .../test/emqx_replay_storage_SUITE.erl | 69 +++++++++++++++++-- 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index e867ad850..2e5fb95f2 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -93,14 +93,14 @@ make_iterator(#db{handle = DBHandle}, TopicFilter, StartTime) -> Hash = compute_topic_hash(TopicFilter), HashBitmask = make_bitmask(TopicFilter), HashFilter = Hash band HashBitmask, - #it{ + {ok, #it{ handle = ITHandle, next_action = {seek, combine(HashFilter, StartTime, <<>>)}, topic_filter = TopicFilter, start_time = StartTime, hash_filter = HashFilter, hash_bitmask = HashBitmask - }; + }}; Err -> Err end. @@ -193,17 +193,6 @@ ones(Bits) -> %% |123|056|678| & |fff|000|fff| = |123|000|678|. -%% Filter = |123|***|678| -%% Key1 = |123|011|108| → Seek = |123|011|678| -%% Key1 = |123|011|679| → Seek = |123|012|678| -%% Key1 = |123|999|679| → Seek = 1|123|000|678| → eos - -%% Filter = |123|***|678|***| -%% Key1 = |123|011|108|121| → Seek = |123|011|678|000| -%% Key1 = |123|011|679|919| → Seek = |123|012|678|000| -%% Key1 = |123|999|679|001| → Seek = 1|123|000|678|000| → eos -%% Key1 = |125|999|179|017| → Seek = 1|123|000|678|000| → eos - match_next( It = #it{ topic_filter = TopicFilter, @@ -256,8 +245,6 @@ compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel) -> % TODO make at least remotely readable / optimize later Result = zipfoldr3( fun(LevelHash, Filter, LevelMask, Bits, Shift, {Carry, Acc}) -> - % io:format(user, "~n *** LH: ~.16B / F: ~.16B / M: ~.16B / Bs: ~B / Sh: ~B~n", [LevelHash, Filter, LevelMask, Bits, Shift]), - % io:format(user, "~n *** Carry: ~B / Acc: ~.16B~n", [Carry, Acc]), case LevelMask of 0 when Carry == 0 -> {0, Acc + (LevelHash bsl Shift)}; @@ -288,12 +275,6 @@ compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel) -> none end. -% zipfoldr3(FoldFun, Acc, I1, I2, I3, Shift, [Bits]) -> -% { Shift + Bits -% , FoldFun( I1 band ones(Bits) -% , I2 band ones(Bits) -% , I3 band ones(Bits) -% , Bits, Acc ) }; zipfoldr3(_FoldFun, Acc, _, _, _, []) -> {0, Acc}; zipfoldr3(FoldFun, Acc, I1, I2, I3, [Bits | Rest]) -> @@ -305,7 +286,6 @@ zipfoldr3(FoldFun, Acc, I1, I2, I3, [Bits | Rest]) -> I3, Rest ), - % { FoldFun(I1 band ones(Bits), I2 band ones(Bits), I3 band ones(Bits), Bits, AccNext). { Shift + Bits, FoldFun( @@ -377,6 +357,14 @@ compute_test_next_seek(TopicHash, HashFilter, HashBitmask) -> next_seek_test_() -> [ + ?_assertMatch( + none, + compute_test_next_seek( + 16#FD_42_4242_043, + 16#FD_42_4242_042, + 16#FF_FF_FFFF_FFF + ) + ), ?_assertMatch( 16#FD_11_0678_000, compute_test_next_seek( diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index 583665eba..e565df455 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -22,8 +22,6 @@ -include_lib("stdlib/include/assert.hrl"). -include_lib("proper/include/proper.hrl"). --define(DB_FILE, ?MODULE_STRING). - %% Smoke test of store function t_store(Config) -> DB = ?config(handle, Config), @@ -60,14 +58,77 @@ t_iterate(Config) -> ], ok. +%% Smoke test for iteration with wildcard topic filter +t_iterate_wildcard(Config) -> + DB = ?config(handle, Config), + %% Prepare data: + Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], + Timestamps = lists:seq(1, 10), + _ = [ + store(DB, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) + || Topic <- Topics, PublishedAt <- Timestamps + ], + ?assertEqual( + lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- Timestamps]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "#", 0)]) + ), + ?assertEqual( + [], + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "#", 10 + 1)]) + ), + ?assertEqual( + lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- lists:seq(5, 10)]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "#", 5)]) + ), + ?assertEqual( + lists:sort([ + {Topic, PublishedAt} + || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps + ]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "foo/#", 0)]) + ), + ?assertEqual( + lists:sort([{"foo/bar", PublishedAt} || PublishedAt <- Timestamps]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "foo/+", 0)]) + ), + ?assertEqual( + [], + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "foo/+/bar", 0)]) + ), + ?assertEqual( + lists:sort([ + {Topic, PublishedAt} + || Topic <- ["foo/bar", "foo/bar/baz", "a/bar"], PublishedAt <- Timestamps + ]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "+/bar/#", 0)]) + ), + ?assertEqual( + lists:sort([{Topic, PublishedAt} || Topic <- ["a", "a/bar"], PublishedAt <- Timestamps]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "a/#", 0)]) + ), + ok. + +store(DB, PublishedAt, Topic, Payload) -> + ID = emqx_guid:gen(), + emqx_replay_message_storage:store(DB, ID, PublishedAt, parse_topic(Topic), Payload). + +iterate(DB, TopicFilter, StartTime) -> + {ok, It} = emqx_replay_message_storage:make_iterator(DB, parse_topic(TopicFilter), StartTime), + iterate(It). + iterate(It) -> case emqx_replay_message_storage:next(It) of - {value, Val} -> - [Val | iterate(It)]; + {value, Payload, ItNext} -> + [Payload | iterate(ItNext)]; none -> [] end. +parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) -> + Topic; +parse_topic(Topic) -> + emqx_topic:words(iolist_to_binary(Topic)). + %% CT callbacks all() -> emqx_common_test_helpers:all(?MODULE). From a1e29e4e24d5a42b6912381fc5ebe0309e85e3d5 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 27 Dec 2022 13:46:37 +0300 Subject: [PATCH 05/49] refactor(ds): Improve function naming --- .../src/emqx_replay_message_storage.erl | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 2e5fb95f2..30a9859b8 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -24,7 +24,13 @@ -export([next/1]). %% Debug/troubleshooting: --export([make_message_key/3, compute_topic_hash/1, hash/2, combine/3]). +-export([ + make_message_key/3, + compute_topic_hash/1, + compute_hash_bitmask/1, + hash/2, + combine/3 +]). -export_type([db/0, iterator/0]). @@ -91,7 +97,7 @@ make_iterator(#db{handle = DBHandle}, TopicFilter, StartTime) -> case rocksdb:iterator(DBHandle, []) of {ok, ITHandle} -> Hash = compute_topic_hash(TopicFilter), - HashBitmask = make_bitmask(TopicFilter), + HashBitmask = compute_hash_bitmask(TopicFilter), HashFilter = Hash band HashBitmask, {ok, #it{ handle = ITHandle, @@ -146,9 +152,9 @@ hash(Input, Bits) -> % at most 32 bits erlang:phash2(Input, 1 bsl Bits). --spec make_bitmask(emqx_topic:words()) -> integer(). -make_bitmask(TopicFilter) -> - make_bitmask(TopicFilter, ?TOPIC_LEVELS_ENTROPY_BITS, 0). +-spec compute_hash_bitmask(emqx_topic:words()) -> integer(). +compute_hash_bitmask(TopicFilter) -> + compute_hash_bitmask(TopicFilter, ?TOPIC_LEVELS_ENTROPY_BITS, 0). %%================================================================================ %% Internal functions @@ -164,17 +170,17 @@ compute_topic_hash([Level | LevelsRest], [Bits | BitsRest], Acc) -> Hash = hash(Level, Bits), compute_topic_hash(LevelsRest, BitsRest, Acc bsl Bits + Hash). -make_bitmask(['#'], BitsPerLevel, Acc) -> +compute_hash_bitmask(['#'], BitsPerLevel, Acc) -> Acc bsl lists:sum(BitsPerLevel) + 0; -make_bitmask(['+' | LevelsRest], [Bits | BitsRest], Acc) -> - make_bitmask(LevelsRest, BitsRest, Acc bsl Bits + 0); -make_bitmask(_, [Bits], Acc) -> +compute_hash_bitmask(['+' | LevelsRest], [Bits | BitsRest], Acc) -> + compute_hash_bitmask(LevelsRest, BitsRest, Acc bsl Bits + 0); +compute_hash_bitmask(_, [Bits], Acc) -> Acc bsl Bits + ones(Bits); -make_bitmask([], [Bits | BitsRest], Acc) -> - make_bitmask([], BitsRest, Acc bsl Bits + ones(Bits)); -make_bitmask([_ | LevelsRest], [Bits | BitsRest], Acc) -> - make_bitmask(LevelsRest, BitsRest, Acc bsl Bits + ones(Bits)); -make_bitmask(_, [], Acc) -> +compute_hash_bitmask([], [Bits | BitsRest], Acc) -> + compute_hash_bitmask([], BitsRest, Acc bsl Bits + ones(Bits)); +compute_hash_bitmask([_ | LevelsRest], [Bits | BitsRest], Acc) -> + compute_hash_bitmask(LevelsRest, BitsRest, Acc bsl Bits + ones(Bits)); +compute_hash_bitmask(_, [], Acc) -> Acc. ones(Bits) -> @@ -302,26 +308,26 @@ zipfoldr3(FoldFun, Acc, I1, I2, I3, [Bits | Rest]) -> -include_lib("eunit/include/eunit.hrl"). -make_test_bitmask(TopicFilter) -> - make_bitmask(TopicFilter, [3, 4, 5, 2], 0). +compute_test_bitmask(TopicFilter) -> + compute_hash_bitmask(TopicFilter, [3, 4, 5, 2], 0). bitmask_test_() -> [ ?_assertEqual( 2#111_1111_11111_11, - make_test_bitmask([<<"foo">>, <<"bar">>]) + compute_test_bitmask([<<"foo">>, <<"bar">>]) ), ?_assertEqual( 2#111_0000_11111_11, - make_test_bitmask([<<"foo">>, '+']) + compute_test_bitmask([<<"foo">>, '+']) ), ?_assertEqual( 2#111_0000_00000_11, - make_test_bitmask([<<"foo">>, '+', '+']) + compute_test_bitmask([<<"foo">>, '+', '+']) ), ?_assertEqual( 2#111_0000_11111_00, - make_test_bitmask([<<"foo">>, '+', <<"bar">>, '+']) + compute_test_bitmask([<<"foo">>, '+', <<"bar">>, '+']) ) ]. @@ -329,19 +335,19 @@ wildcard_bitmask_test_() -> [ ?_assertEqual( 2#000_0000_00000_00, - make_test_bitmask(['#']) + compute_test_bitmask(['#']) ), ?_assertEqual( 2#111_0000_00000_00, - make_test_bitmask([<<"foo">>, '#']) + compute_test_bitmask([<<"foo">>, '#']) ), ?_assertEqual( 2#111_1111_11111_00, - make_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, '#']) + compute_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, '#']) ), ?_assertEqual( 2#111_1111_11111_11, - make_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, '#']) + compute_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, '#']) ) ]. From 5b6c4d766bb6d0643b8690e0c6d7902fdd27d796 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 27 Dec 2022 13:47:38 +0300 Subject: [PATCH 06/49] test(ds): Add basic property tests --- .../test/emqx_replay_storage_SUITE.erl | 106 +++++ apps/emqx_replay/test/payload_gen.erl | 377 ++++++++++++++++++ 2 files changed, 483 insertions(+) create mode 100644 apps/emqx_replay/test/payload_gen.erl diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index e565df455..761ac041a 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -106,6 +106,10 @@ t_iterate_wildcard(Config) -> lists:sort([{Topic, PublishedAt} || Topic <- ["a", "a/bar"], PublishedAt <- Timestamps]), lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "a/#", 0)]) ), + ?assertEqual( + [], + lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "a/+/+", 0)]) + ), ok. store(DB, PublishedAt, Topic, Payload) -> @@ -129,6 +133,108 @@ parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) -> parse_topic(Topic) -> emqx_topic:words(iolist_to_binary(Topic)). +%% + +t_prop_topic_hash_computes(_) -> + ?assert( + proper:quickcheck( + ?FORALL(Topic, topic(), begin + Hash = emqx_replay_message_storage:compute_topic_hash(Topic), + is_integer(Hash) andalso (byte_size(binary:encode_unsigned(Hash)) =< 8) + end) + ) + ). + +t_prop_hash_bitmask_computes(_) -> + ?assert( + proper:quickcheck( + ?FORALL(TopicFilter, topic_filter(), begin + Hash = emqx_replay_message_storage:compute_hash_bitmask(TopicFilter), + is_integer(Hash) andalso (byte_size(binary:encode_unsigned(Hash)) =< 8) + end) + ) + ). + +t_prop_iterate_stored_messages(Config) -> + DB = ?config(handle, Config), + ?assertEqual( + true, + proper:quickcheck( + ?FORALL( + Streams, + messages(), + begin + Stream = payload_gen:interleave_streams(Streams), + ok = store_message_stream(DB, Stream) + % TODO actually verify some property + end + ) + ) + ). + +store_message_stream(DB, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> + MessageID = <>, + PublishedAt = rand:uniform(ChunkNum), + ok = emqx_replay_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload), + store_message_stream(DB, payload_gen:next(Rest)); +store_message_stream(_DB, []) -> + ok. + +messages() -> + ?LET(Topics, list(topic()), begin + [{Topic, payload_gen:binary_stream_gen(64)} || Topic <- Topics] + end). + +topic() -> + % TODO + % Somehow generate topic levels with variance according to the entropy distribution? + non_empty(list(topic_level())). + +topic(EntropyWeights) -> + ?LET( + L, + list(1), + % ?SIZED(S, [topic(S * nth(I, EntropyWeights, 1)) || I <- lists:seq(1, Len)]) + % [topic(10 * nth(I, EntropyWeights, 1)) || I <- lists:seq(1, Len)] + ?SIZED(S, [topic_level(S * EW) || EW <- lists:sublist(EntropyWeights ++ L, length(L))]) + ). + +topic_filter() -> + ?SUCHTHAT( + L, + non_empty( + list( + frequency([ + {5, topic_level()}, + {2, '+'}, + {1, '#'} + ]) + ) + ), + not lists:member('#', L) orelse lists:last(L) == '#' + ). + +% topic() -> +% ?LAZY(?SIZED(S, frequency([ +% {S, [topic_level() | topic()]}, +% {1, []} +% ]))). + +% topic_filter() -> +% ?LAZY(?SIZED(S, frequency([ +% {round(S / 3 * 2), [topic_level() | topic_filter()]}, +% {round(S / 3 * 1), ['+' | topic_filter()]}, +% {1, []}, +% {1, ['#']} +% ]))). + +topic_level() -> + ?LET(L, list(oneof([range($a, $z), range($0, $9)])), iolist_to_binary(L)). + +topic_level(Entropy) -> + S = floor(1 + math:log2(Entropy) / 4), + ?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))). + %% CT callbacks all() -> emqx_common_test_helpers:all(?MODULE). diff --git a/apps/emqx_replay/test/payload_gen.erl b/apps/emqx_replay/test/payload_gen.erl new file mode 100644 index 000000000..17e68f8d5 --- /dev/null +++ b/apps/emqx_replay/test/payload_gen.erl @@ -0,0 +1,377 @@ +%% @doc This module provides lazy, composable producer streams that +%% can be considered counterparts to Archiver's consumer pipes and +%% therefore can facilitate testing +%% +%% Also it comes with an implementation of binary data stream which is +%% able to produce sufficiently large amounts of plausibly +%% pseudorandom binary payload in a deterministic way. It also +%% contains routines to check binary blobs via sampling +-module(payload_gen). + +-define(end_of_stream, []). + +-dialyzer(no_improper_lists). + +%% Generic stream API: +-export([ + interleave_streams/1, + retransmits/2, + next/1, + consume/2, + consume/1 +]). + +%% Binary payload generator API: +-export([ + interleave_chunks/2, + interleave_chunks/1, + + mb/1, + + generator_fun/2, + generate_chunks/3, + generate_chunk/2, + check_consistency/3, + check_file_consistency/3, + get_byte/2 +]). + +%% List to stream generator API: +-export([list_to_stream/1]). + +%% Proper generators: +-export([ + binary_stream_gen/1, + interleaved_streams_gen/1, + interleaved_binary_gen/1, + interleaved_list_gen/1 +]). + +-export_type([payload/0, binary_payload/0]). + +-define(hash_size, 16). + +-include_lib("proper/include/proper.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-type payload() :: {Seed :: term(), Size :: integer()}. + +-type binary_payload() :: { + binary(), _ChunkNum :: non_neg_integer(), _ChunkCnt :: non_neg_integer() +}. + +%% For performance reasons we treat regular lists as streams, see `next/1' +-opaque cont(Data) :: + fun(() -> stream(Data)) + | stream(Data). + +-type stream(Data) :: + maybe_improper_list(Data, cont(Data)) + | ?end_of_stream. + +-type tagged_binstream() :: stream({Tag :: term(), Payload :: chunk_state()}). + +-record(chunk_state, { + seed :: term(), + payload_size :: non_neg_integer(), + offset :: non_neg_integer(), + chunk_size :: non_neg_integer() +}). + +-opaque chunk_state() :: #chunk_state{}. + +-record(interleave_state, {streams :: [{Tag :: term(), Stream :: term()}]}). + +-opaque interleave_state() :: #interleave_state{}. + +%% ============================================================================= +%% API functions +%% ============================================================================= + +%% ----------------------------------------------------------------------------- +%% Proper generators +%% ----------------------------------------------------------------------------- + +%% @doc Proper generator that creates a binary stream +-spec binary_stream_gen(_ChunkSize :: non_neg_integer()) -> proper_types:type(). +binary_stream_gen(ChunkSize) when ChunkSize rem ?hash_size =:= 0 -> + ?LET( + {Seed, Size}, + {nat(), range(1, 16#100000)}, + generate_chunk({Seed, Size}, ChunkSize) + ). + +%% @equiv interleaved_streams_gen(10, Type) +-spec interleaved_streams_gen(proper_types:type()) -> proper_types:type(). +interleaved_streams_gen(Type) -> + interleaved_streams_gen(10, Type). + +%% @doc Proper generator that creates a term of type +%% ```[{_Tag :: binary(), stream()}]''' that is ready to be fed +%% into `interleave_streams/1' function +-spec interleaved_streams_gen(non_neg_integer(), proper_types:type()) -> + proper_types:type(). +interleaved_streams_gen(MaxNStreams, StreamType) -> + ?LET( + NStreams, + range(1, MaxNStreams), + ?LET( + Streams, + vector(NStreams, StreamType), + begin + Tags = [<> || I <- lists:seq(1, length(Streams))], + lists:zip(Tags, Streams) + end + ) + ). + +-spec interleaved_binary_gen(non_neg_integer()) -> proper_types:type(). +interleaved_binary_gen(ChunkSize) -> + interleaved_streams_gen(binary_stream_gen(ChunkSize)). + +-spec interleaved_list_gen(proper_types:type()) -> proper_types:type(). +interleaved_list_gen(Type) -> + interleaved_streams_gen(non_empty(list(Type))). + +%% ----------------------------------------------------------------------------- +%% Generic streams +%% ----------------------------------------------------------------------------- + +%% @doc Consume one element from the stream. +-spec next(cont(A)) -> stream(A). +next(Fun) when is_function(Fun, 0) -> + Fun(); +next(L) -> + L. + +%% @doc Take a list of tagged streams and return a stream where +%% elements of the streams are tagged and randomly interleaved. +%% +%% Note: this function is more or less generic and it's compatible +%% with this module's `generate_chunks' function family, as well as +%% `ets:next', lists and what not +%% +%% Consider using simplified versions of this function +-spec interleave_streams([{Tag, stream(Data)}]) -> stream({Tag, Data}). +interleave_streams(Streams) -> + do_interleave_streams( + #interleave_state{streams = Streams} + ). + +%% @doc Take an arbitrary stream and add repetitions of the elements +%% TODO: Make retransmissions of arbitrary length +-spec retransmits(stream(Data), float()) -> stream(Data). +retransmits(Stream, Probability) -> + case Stream of + [Data | Cont0] -> + Cont = fun() -> retransmits(next(Cont0), Probability) end, + case rand:uniform() < Probability of + true -> [Data, Data | Cont]; + false -> [Data | Cont] + end; + ?end_of_stream -> + ?end_of_stream + end. + +%% @doc Consume all elements of the stream and feed them into a +%% callback (e.g. brod:produce) +-spec consume( + stream(A), + fun((A) -> Ret) +) -> [Ret]. +consume(Stream, Callback) -> + case Stream of + [Data | Cont] -> [Callback(Data) | consume(next(Cont), Callback)]; + ?end_of_stream -> [] + end. + +%% @equiv consume(Stream, fun(A) -> A end) +-spec consume(stream(A)) -> [A]. +consume(Stream) -> + consume(Stream, fun(A) -> A end). + +%% ----------------------------------------------------------------------------- +%% Misc functions +%% ----------------------------------------------------------------------------- + +%% @doc Return number of bytes in `N' megabytes +-spec mb(integer()) -> integer(). +mb(N) -> + N * 1048576. + +%% ----------------------------------------------------------------------------- +%% List streams +%% ----------------------------------------------------------------------------- +-spec list_to_stream([A]) -> stream(A). +list_to_stream(L) -> L. + +%% ----------------------------------------------------------------------------- +%% Binary streams +%% ----------------------------------------------------------------------------- + +%% @doc First argument is a chunk number, the second one is a seed. +%% This implementation is hardly efficient, but it was chosen for +%% clarity reasons +-spec generator_fun(integer(), binary()) -> binary(). +generator_fun(N, Seed) -> + crypto:hash(md5, <>). + +%% @doc Get byte at offset `N' +-spec get_byte(integer(), term()) -> byte(). +get_byte(N, Seed) -> + do_get_byte(N, seed_hash(Seed)). + +%% @doc Stream of binary chunks. Limitation: both payload size and +%% `ChunkSize' should be dividable by `?hash_size' +-spec generate_chunk(payload(), integer()) -> stream(binary_payload()). +generate_chunk({Seed, Size}, ChunkSize) when + ChunkSize rem ?hash_size =:= 0 +-> + State = #chunk_state{ + seed = Seed, + payload_size = Size, + chunk_size = ChunkSize, + offset = 0 + }, + generate_chunk(State). + +%% @doc Take a list of `payload()'s and a callback function, and start +%% producing the payloads in random order. Seed is used as a tag +%% @see interleave_streams/4 +-spec interleave_chunks([{payload(), ChunkSize :: non_neg_integer()}]) -> + tagged_binstream(). +interleave_chunks(Streams0) -> + Streams = [ + {Tag, generate_chunk(Payload, ChunkSize)} + || {Payload = {Tag, _}, ChunkSize} <- Streams0 + ], + interleave_streams(Streams). + +%% @doc Take a list of `payload()'s and a callback function, and start +%% consuming the payloads in a random order. Seed is used as a +%% tag. All streams use the same chunk size +%% @see interleave_streams/2 +-spec interleave_chunks( + [payload()], + non_neg_integer() +) -> tagged_binstream(). +interleave_chunks(Streams0, ChunkSize) -> + Streams = [ + {Seed, generate_chunk({Seed, Size}, ChunkSize)} + || {Seed, Size} <- Streams0 + ], + interleave_streams(Streams). + +%% @doc Generate chunks of data and feed them into +%% `Callback' +-spec generate_chunks( + payload(), + integer(), + fun((binary()) -> A) +) -> [A]. +generate_chunks(Payload, ChunkSize, Callback) -> + consume(generate_chunk(Payload, ChunkSize), Callback). + +-spec check_consistency( + payload(), + integer(), + fun((integer()) -> {ok, binary()} | undefined) +) -> ok. +check_consistency({Seed, Size}, SampleSize, Callback) -> + SeedHash = seed_hash(Seed), + Random = [rand:uniform(Size) - 1 || _ <- lists:seq(1, SampleSize)], + %% Always check first and last bytes, and one that should not exist: + Samples = [0, Size - 1, Size | Random], + lists:foreach( + fun + (N) when N < Size -> + Expected = do_get_byte(N, SeedHash), + ?assertEqual( + {N, {ok, Expected}}, + {N, Callback(N)} + ); + (N) -> + ?assertMatch(undefined, Callback(N)) + end, + Samples + ). + +-spec check_file_consistency( + payload(), + integer(), + file:filename() +) -> ok. +check_file_consistency(Payload, SampleSize, FileName) -> + {ok, FD} = file:open(FileName, [read, raw]), + try + Fun = fun(N) -> + case file:pread(FD, [{N, 1}]) of + {ok, [[X]]} -> {ok, X}; + {ok, [eof]} -> undefined + end + end, + check_consistency(Payload, SampleSize, Fun) + after + file:close(FD) + end. + +%% ============================================================================= +%% Internal functions +%% ============================================================================= + +-spec do_interleave_streams(interleave_state()) -> stream(_Data). +do_interleave_streams(#interleave_state{streams = []}) -> + ?end_of_stream; +do_interleave_streams(#interleave_state{streams = Streams} = State0) -> + %% Not the most efficient implementation (lots of avoidable list + %% traversals), but we don't expect the number of streams to be the + %% bottleneck + N = rand:uniform(length(Streams)), + {Hd, [{Tag, SC} | Tl]} = lists:split(N - 1, Streams), + case SC of + [Payload | SC1] -> + State = State0#interleave_state{streams = Hd ++ [{Tag, next(SC1)} | Tl]}, + Cont = fun() -> do_interleave_streams(State) end, + [{Tag, Payload} | Cont]; + ?end_of_stream -> + State = State0#interleave_state{streams = Hd ++ Tl}, + do_interleave_streams(State) + end. + +%% @doc Continue generating chunks +-spec generate_chunk(chunk_state()) -> stream(binary()). +generate_chunk(#chunk_state{offset = Offset, payload_size = Size}) when + Offset >= Size +-> + ?end_of_stream; +generate_chunk(State0 = #chunk_state{offset = Offset, chunk_size = ChunkSize}) -> + State = State0#chunk_state{offset = Offset + ChunkSize}, + Payload = generate_chunk( + State#chunk_state.seed, + Offset, + ChunkSize, + State#chunk_state.payload_size + ), + [Payload | fun() -> generate_chunk(State) end]. + +generate_chunk(Seed, Offset, ChunkSize, Size) -> + SeedHash = seed_hash(Seed), + To = min(Offset + ChunkSize, Size) - 1, + Payload = iolist_to_binary([ + generator_fun(I, SeedHash) + || I <- lists:seq(Offset div 16, To div 16) + ]), + ChunkNum = Offset div ChunkSize + 1, + ChunkCnt = ceil(Size / ChunkSize), + {Payload, ChunkNum, ChunkCnt}. + +%% @doc Hash any term +-spec seed_hash(term()) -> binary(). +seed_hash(Seed) -> + crypto:hash(md5, term_to_binary(Seed)). + +%% @private Get byte at offset `N' +-spec do_get_byte(integer(), binary()) -> byte(). +do_get_byte(N, Seed) -> + Chunk = generator_fun(N div ?hash_size, Seed), + binary:at(Chunk, N rem ?hash_size). From 8c9c9cc66953f3231222bc82af2d8b62a3c4bd35 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 28 Dec 2022 19:02:05 +0300 Subject: [PATCH 07/49] feat(ds): Allow to specify message store options * Keymapper * Column family name + DB options * DB write / read options --- .../src/emqx_replay_message_storage.erl | 190 ++++++++++++++---- .../test/emqx_replay_storage_SUITE.erl | 25 ++- 2 files changed, 167 insertions(+), 48 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 30a9859b8..b591157f4 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -18,6 +18,7 @@ %% API: -export([open/2, close/1]). +-export([make_keymapper/1]). -export([store/5]). -export([make_iterator/3]). @@ -25,11 +26,11 @@ %% Debug/troubleshooting: -export([ - make_message_key/3, - compute_topic_hash/1, - compute_hash_bitmask/1, - hash/2, - combine/3 + make_message_key/4, + compute_topic_hash/2, + compute_hash_bitmask/2, + combine/4, + hash/2 ]). -export_type([db/0, iterator/0]). @@ -38,21 +39,69 @@ %% Type declarations %%================================================================================ -%% see rocksdb:db_options() --type options() :: proplists:proplist(). - %% parsed -type topic() :: list(binary()). %% TODO granularity? -type time() :: integer(). +%% Number of bits +-type bits() :: non_neg_integer(). + +%% Key of a RocksDB record. +-type key() :: binary(). + +%% Distribution of entropy among topic levels. +%% Example: [4, 8, 16] means that level 1 gets 4 bits, level 2 gets 8 bits, +%% and _rest of levels_ (if any) get 16 bits. +-type bits_per_level() :: [bits(), ...]. + +%% see rocksdb:db_options() +-type db_options() :: proplists:proplist(). + +%% see rocksdb:cf_options() +-type db_cf_options() :: proplists:proplist(). + +%% see rocksdb:write_options() +-type db_write_options() :: proplists:proplist(). + +%% see rocksdb:read_options() +-type db_read_options() :: proplists:proplist(). + +-type options() :: #{ + %% Keymapper. + keymapper := keymapper(), + %% Name and options to use to open specific column family. + column_family => {_Name :: string(), db_cf_options()}, + %% Options to use when opening the DB. + open_options => db_options(), + %% Options to use when writing a message to the DB. + write_options => db_write_options(), + %% Options to use when iterating over messages in the DB. + read_options => db_read_options() +}. + +-define(DEFAULT_COLUMN_FAMILY, {"default", []}). + +-define(DEFAULT_OPEN_OPTIONS, [ + {create_if_missing, true}, + {create_missing_column_families, true} +]). + +-define(DEFAULT_WRITE_OPTIONS, [{sync, true}]). +-define(DEFAULT_READ_OPTIONS, []). + -record(db, { - handle :: rocksdb:db_handle() + handle :: rocksdb:db_handle(), + cf :: rocksdb:cf_handle(), + keymapper :: keymapper(), + write_options = [{sync, true}] :: db_write_options(), + read_options = [] :: db_write_options() }). -record(it, { handle :: rocksdb:itr_handle(), + keymapper :: keymapper(), next_action :: {seek, binary()} | next, topic_filter :: emqx_topic:words(), hash_filter :: integer(), @@ -60,9 +109,17 @@ start_time :: time() }). --opaque db() :: #db{}. +% NOTE +% Keymapper decides how to map messages into RocksDB column family keyspace. +-record(keymapper, { + topic_bits :: bits(), + topic_bits_per_level :: bits_per_level(), + timestamp_bits :: bits() +}). +-opaque db() :: #db{}. -opaque iterator() :: #it{}. +-type keymapper() :: #keymapper{}. %%================================================================================ %% API funcions @@ -71,9 +128,30 @@ -spec open(file:filename_all(), options()) -> {ok, db()} | {error, _TODO}. open(Filename, Options) -> - case rocksdb:open(Filename, [{create_if_missing, true}, Options]) of - {ok, Handle} -> - {ok, #db{handle = Handle}}; + CFDescriptors = + case maps:get(column_family, Options, undefined) of + CF = {_Name, _} -> + % TODO + % > When opening a DB in a read-write mode, you need to specify all + % > Column Families that currently exist in a DB. If that's not the case, + % > DB::Open call will return Status::InvalidArgument(). + % This probably means that we need the _manager_ (the thing which knows + % about all the column families there is) to hold the responsibility to + % open the database and hold all the handles. + [CF, ?DEFAULT_COLUMN_FAMILY]; + undefined -> + [?DEFAULT_COLUMN_FAMILY] + end, + DBOptions = maps:get(open_options, Options, ?DEFAULT_OPEN_OPTIONS), + case rocksdb:open(Filename, DBOptions, CFDescriptors) of + {ok, Handle, [CFHandle | _]} -> + {ok, #db{ + handle = Handle, + cf = CFHandle, + keymapper = maps:get(keymapper, Options), + write_options = maps:get(write_options, Options, ?DEFAULT_WRITE_OPTIONS), + read_options = maps:get(read_options, Options, ?DEFAULT_READ_OPTIONS) + }}; Error -> Error end. @@ -82,26 +160,44 @@ open(Filename, Options) -> close(#db{handle = DB}) -> rocksdb:close(DB). +-spec make_keymapper(Options) -> keymapper() when + Options :: #{ + %% Number of bits in a key allocated to a message timestamp. + timestamp_bits := bits(), + %% Number of bits in a key allocated to each level in a message topic. + topic_bits_per_level := bits_per_level() + }. +make_keymapper(Options) -> + TimestampBits = maps:get(timestamp_bits, Options), + TopicBitsPerLevel = maps:get(topic_bits_per_level, Options), + #keymapper{ + timestamp_bits = TimestampBits, + topic_bits = lists:sum(TopicBitsPerLevel), + topic_bits_per_level = TopicBitsPerLevel + }. + -spec store(db(), emqx_guid:guid(), time(), topic(), binary()) -> ok | {error, _TODO}. -store(#db{handle = DB}, MessageID, PublishedAt, Topic, MessagePayload) -> - Key = make_message_key(MessageID, Topic, PublishedAt), +store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, MessagePayload) -> + Key = make_message_key(Topic, PublishedAt, MessageID, DB#db.keymapper), Value = make_message_value(Topic, MessagePayload), - rocksdb:put(DB, Key, Value, [{sync, true}]). + rocksdb:put(DBHandle, CFHandle, Key, Value, DB#db.write_options). -spec make_iterator(db(), emqx_topic:words(), time() | earliest) -> % {error, invalid_start_time}? might just start from the beginning of time % and call it a day: client violated the contract anyway. {ok, iterator()} | {error, _TODO}. -make_iterator(#db{handle = DBHandle}, TopicFilter, StartTime) -> - case rocksdb:iterator(DBHandle, []) of +make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime) -> + case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of {ok, ITHandle} -> - Hash = compute_topic_hash(TopicFilter), - HashBitmask = compute_hash_bitmask(TopicFilter), + Hash = compute_topic_hash(TopicFilter, DB#db.keymapper), + HashBitmask = compute_hash_bitmask(TopicFilter, DB#db.keymapper), HashFilter = Hash band HashBitmask, + InitialSeek = combine(HashFilter, StartTime, <<>>, DB#db.keymapper), {ok, #it{ handle = ITHandle, - next_action = {seek, combine(HashFilter, StartTime, <<>>)}, + keymapper = DB#db.keymapper, + next_action = {seek, InitialSeek}, topic_filter = TopicFilter, start_time = StartTime, hash_filter = HashFilter, @@ -116,7 +212,7 @@ next(It = #it{next_action = Action}) -> case rocksdb:iterator_move(It#it.handle, Action) of % spec says `{ok, Key}` is also possible but the implementation says it's not {ok, Key, Value} -> - {TopicHash, PublishedAt} = extract(Key), + {TopicHash, PublishedAt} = extract(Key, It#it.keymapper), match_next(It, TopicHash, PublishedAt, Value); {error, invalid_iterator} -> stop_iteration(It); @@ -128,10 +224,8 @@ next(It = #it{next_action = Action}) -> %% Internal exports %%================================================================================ --define(TOPIC_LEVELS_ENTROPY_BITS, [8, 8, 32, 16]). - -make_message_key(MessageID, Topic, PublishedAt) -> - combine(compute_topic_hash(Topic), PublishedAt, MessageID). +make_message_key(Topic, PublishedAt, MessageID, Keymapper) -> + combine(compute_topic_hash(Topic, Keymapper), PublishedAt, MessageID, Keymapper). make_message_value(Topic, MessagePayload) -> term_to_binary({Topic, MessagePayload}). @@ -139,22 +233,33 @@ make_message_value(Topic, MessagePayload) -> unwrap_message_value(Binary) -> binary_to_term(Binary). -combine(TopicHash, PublishedAt, MessageID) -> - <>. +-spec combine(_TopicHash :: integer(), time(), emqx_guid:guid(), keymapper()) -> + key(). +combine(TopicHash, PublishedAt, MessageID, #keymapper{ + timestamp_bits = TimestampBits, + topic_bits = TopicBits +}) -> + <>. -extract(<>) -> +-spec extract(key(), keymapper()) -> + {_TopicHash :: integer(), time()}. +extract(Key, #keymapper{ + timestamp_bits = TimestampBits, + topic_bits = TopicBits +}) -> + <> = Key, {TopicHash, PublishedAt}. -compute_topic_hash(Topic) -> - compute_topic_hash(Topic, ?TOPIC_LEVELS_ENTROPY_BITS, 0). +compute_topic_hash(Topic, Keymapper) -> + compute_topic_hash(Topic, Keymapper#keymapper.topic_bits_per_level, 0). hash(Input, Bits) -> % at most 32 bits erlang:phash2(Input, 1 bsl Bits). --spec compute_hash_bitmask(emqx_topic:words()) -> integer(). -compute_hash_bitmask(TopicFilter) -> - compute_hash_bitmask(TopicFilter, ?TOPIC_LEVELS_ENTROPY_BITS, 0). +-spec compute_hash_bitmask(emqx_topic:words(), keymapper()) -> integer(). +compute_hash_bitmask(TopicFilter, Keymapper) -> + compute_hash_bitmask(TopicFilter, Keymapper#keymapper.topic_bits_per_level, 0). %%================================================================================ %% Internal functions @@ -201,6 +306,7 @@ ones(Bits) -> match_next( It = #it{ + keymapper = Keymapper, topic_filter = TopicFilter, hash_filter = HashFilter, hash_bitmask = HashBitmask, @@ -222,13 +328,13 @@ match_next( next(It#it{next_action = next}) end; true -> - NextAction = {seek, combine(TopicHash, StartTime, <<>>)}, - next(It#it{next_action = NextAction}); + NextSeek = combine(TopicHash, StartTime, <<>>, Keymapper), + next(It#it{next_action = {seek, NextSeek}}); false -> - case compute_next_seek(TopicHash, HashFilter, HashBitmask) of + case compute_next_seek(TopicHash, HashFilter, HashBitmask, Keymapper) of NextHash when is_integer(NextHash) -> - NextAction = {seek, combine(NextHash, StartTime, <<>>)}, - next(It#it{next_action = NextAction}); + NextSeek = combine(NextHash, StartTime, <<>>, Keymapper), + next(It#it{next_action = {seek, NextSeek}}); none -> stop_iteration(It) end @@ -238,9 +344,9 @@ stop_iteration(It) -> ok = rocksdb:iterator_close(It#it.handle), none. -compute_next_seek(TopicHash, HashFilter, HashBitmask) -> - compute_next_seek(TopicHash, HashFilter, HashBitmask, ?TOPIC_LEVELS_ENTROPY_BITS). - +compute_next_seek(TopicHash, HashFilter, HashBitmask, Keymapper = #keymapper{}) -> + BitsPerLevel = Keymapper#keymapper.topic_bits_per_level, + compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel); compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel) -> % NOTE % Ok, this convoluted mess implements a sort of _increment operation_ for some diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index 761ac041a..b1a8a396b 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -136,20 +136,28 @@ parse_topic(Topic) -> %% t_prop_topic_hash_computes(_) -> + Keymapper = emqx_replay_message_storage:make_keymapper(#{ + topic_bits_per_level => [8, 12, 16, 24], + timestamp_bits => 0 + }), ?assert( proper:quickcheck( ?FORALL(Topic, topic(), begin - Hash = emqx_replay_message_storage:compute_topic_hash(Topic), + Hash = emqx_replay_message_storage:compute_topic_hash(Topic, Keymapper), is_integer(Hash) andalso (byte_size(binary:encode_unsigned(Hash)) =< 8) end) ) ). t_prop_hash_bitmask_computes(_) -> + Keymapper = emqx_replay_message_storage:make_keymapper(#{ + topic_bits_per_level => [8, 12, 16, 24], + timestamp_bits => 0 + }), ?assert( proper:quickcheck( ?FORALL(TopicFilter, topic_filter(), begin - Hash = emqx_replay_message_storage:compute_hash_bitmask(TopicFilter), + Hash = emqx_replay_message_storage:compute_hash_bitmask(TopicFilter, Keymapper), is_integer(Hash) andalso (byte_size(binary:encode_unsigned(Hash)) =< 8) end) ) @@ -165,8 +173,9 @@ t_prop_iterate_stored_messages(Config) -> messages(), begin Stream = payload_gen:interleave_streams(Streams), - ok = store_message_stream(DB, Stream) + ok = store_message_stream(DB, Stream), % TODO actually verify some property + true end ) ) @@ -194,8 +203,6 @@ topic(EntropyWeights) -> ?LET( L, list(1), - % ?SIZED(S, [topic(S * nth(I, EntropyWeights, 1)) || I <- lists:seq(1, Len)]) - % [topic(10 * nth(I, EntropyWeights, 1)) || I <- lists:seq(1, Len)] ?SIZED(S, [topic_level(S * EW) || EW <- lists:sublist(EntropyWeights ++ L, length(L))]) ). @@ -242,7 +249,13 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_testcase(TC, Config) -> Filename = filename:join(?MODULE_STRING, atom_to_list(TC)), ok = filelib:ensure_dir(Filename), - {ok, DB} = emqx_replay_message_storage:open(Filename, []), + {ok, DB} = emqx_replay_message_storage:open(Filename, #{ + column_family => {atom_to_list(TC), []}, + keymapper => emqx_replay_message_storage:make_keymapper(#{ + topic_bits_per_level => [8, 8, 32, 16], + timestamp_bits => 64 + }) + }), [{handle, DB} | Config]. end_per_testcase(_TC, Config) -> From 5e30a5d3dd2ffa6a770f51e1acc3bb96f36e17c4 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 30 Dec 2022 13:35:04 +0300 Subject: [PATCH 08/49] feat(ds): Implement keyspace partitioning across time --- .../src/emqx_replay_message_storage.erl | 354 ++++++++++++------ .../test/emqx_replay_storage_SUITE.erl | 23 +- 2 files changed, 244 insertions(+), 133 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index b591157f4..66668b23b 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -27,9 +27,8 @@ %% Debug/troubleshooting: -export([ make_message_key/4, - compute_topic_hash/2, + compute_bitstring/3, compute_hash_bitmask/2, - combine/4, hash/2 ]). @@ -104,19 +103,27 @@ keymapper :: keymapper(), next_action :: {seek, binary()} | next, topic_filter :: emqx_topic:words(), - hash_filter :: integer(), + hash_bitfilter :: integer(), hash_bitmask :: integer(), - start_time :: time() + time_bitfilter :: integer(), + time_bitmask :: integer() }). % NOTE % Keymapper decides how to map messages into RocksDB column family keyspace. -record(keymapper, { - topic_bits :: bits(), - topic_bits_per_level :: bits_per_level(), - timestamp_bits :: bits() + source :: [bitsource(), ...], + bitsize :: bits(), + tau :: non_neg_integer() }). +-type bitsource() :: + %% Consume `_Size` bits from timestamp starting at `_Offset`th bit. + %% TODO consistency + {timestamp, _Offset :: bits(), _Size :: bits()} + %% Consume next topic level (either one or all of them) and compute `_Size` bits-wide hash. + | {hash, level | levels, _Size :: bits()}. + -opaque db() :: #db{}. -opaque iterator() :: #it{}. -type keymapper() :: #keymapper{}. @@ -162,18 +169,32 @@ close(#db{handle = DB}) -> -spec make_keymapper(Options) -> keymapper() when Options :: #{ - %% Number of bits in a key allocated to a message timestamp. + %% Number of bits in a message timestamp. timestamp_bits := bits(), %% Number of bits in a key allocated to each level in a message topic. - topic_bits_per_level := bits_per_level() + topic_bits_per_level := bits_per_level(), + %% Maximum granularity of iteration over time. + max_tau := time() }. -make_keymapper(Options) -> - TimestampBits = maps:get(timestamp_bits, Options), - TopicBitsPerLevel = maps:get(topic_bits_per_level, Options), +make_keymapper(#{ + timestamp_bits := TimestampBits, + topic_bits_per_level := BitsPerLevel, + max_tau := MaxTau +}) -> + TimestampLSBs = floor(math:log2(MaxTau)), + TimestampMSBs = TimestampBits - TimestampLSBs, + NLevels = length(BitsPerLevel), + {LevelBits, [TailLevelsBits]} = lists:split(NLevels - 1, BitsPerLevel), + Source = lists:flatten([ + {timestamp, TimestampLSBs, TimestampMSBs}, + [{hash, level, Bits} || Bits <- LevelBits], + {hash, levels, TailLevelsBits}, + [{timestamp, 0, TimestampLSBs} || TimestampLSBs > 0] + ]), #keymapper{ - timestamp_bits = TimestampBits, - topic_bits = lists:sum(TopicBitsPerLevel), - topic_bits_per_level = TopicBitsPerLevel + source = Source, + bitsize = lists:sum([S || {_, _, S} <- Source]), + tau = 1 bsl TimestampLSBs }. -spec store(db(), emqx_guid:guid(), time(), topic(), binary()) -> @@ -190,18 +211,21 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime) -> case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of {ok, ITHandle} -> - Hash = compute_topic_hash(TopicFilter, DB#db.keymapper), + Bitstring = compute_bitstring(TopicFilter, StartTime, DB#db.keymapper), HashBitmask = compute_hash_bitmask(TopicFilter, DB#db.keymapper), - HashFilter = Hash band HashBitmask, - InitialSeek = combine(HashFilter, StartTime, <<>>, DB#db.keymapper), + TimeBitmask = compute_time_bitmask(DB#db.keymapper), + HashBitfilter = Bitstring band HashBitmask, + TimeBitfilter = Bitstring band TimeBitmask, + InitialSeek = combine(HashBitfilter bor TimeBitfilter, <<>>, DB#db.keymapper), {ok, #it{ handle = ITHandle, keymapper = DB#db.keymapper, next_action = {seek, InitialSeek}, topic_filter = TopicFilter, - start_time = StartTime, - hash_filter = HashFilter, - hash_bitmask = HashBitmask + hash_bitfilter = HashBitfilter, + hash_bitmask = HashBitmask, + time_bitfilter = TimeBitfilter, + time_bitmask = TimeBitmask }}; Err -> Err @@ -212,8 +236,8 @@ next(It = #it{next_action = Action}) -> case rocksdb:iterator_move(It#it.handle, Action) of % spec says `{ok, Key}` is also possible but the implementation says it's not {ok, Key, Value} -> - {TopicHash, PublishedAt} = extract(Key, It#it.keymapper), - match_next(It, TopicHash, PublishedAt, Value); + Bitstring = extract(Key, It#it.keymapper), + match_next(It, Bitstring, Value); {error, invalid_iterator} -> stop_iteration(It); {error, iterator_closed} -> @@ -225,7 +249,7 @@ next(It = #it{next_action = Action}) -> %%================================================================================ make_message_key(Topic, PublishedAt, MessageID, Keymapper) -> - combine(compute_topic_hash(Topic, Keymapper), PublishedAt, MessageID, Keymapper). + combine(compute_bitstring(Topic, PublishedAt, Keymapper), MessageID, Keymapper). make_message_value(Topic, MessagePayload) -> term_to_binary({Topic, MessagePayload}). @@ -233,61 +257,74 @@ make_message_value(Topic, MessagePayload) -> unwrap_message_value(Binary) -> binary_to_term(Binary). --spec combine(_TopicHash :: integer(), time(), emqx_guid:guid(), keymapper()) -> +-spec combine(_Bitstring :: integer(), emqx_guid:guid(), keymapper()) -> key(). -combine(TopicHash, PublishedAt, MessageID, #keymapper{ - timestamp_bits = TimestampBits, - topic_bits = TopicBits -}) -> - <>. +combine(Bitstring, MessageID, #keymapper{bitsize = Size}) -> + <>. -spec extract(key(), keymapper()) -> - {_TopicHash :: integer(), time()}. -extract(Key, #keymapper{ - timestamp_bits = TimestampBits, - topic_bits = TopicBits -}) -> - <> = Key, - {TopicHash, PublishedAt}. + _Bitstring :: integer(). +extract(Key, #keymapper{bitsize = Size}) -> + <> = Key, + Bitstring. -compute_topic_hash(Topic, Keymapper) -> - compute_topic_hash(Topic, Keymapper#keymapper.topic_bits_per_level, 0). +-spec compute_bitstring(topic(), time(), keymapper()) -> integer(). +compute_bitstring(Topic, Timestamp, #keymapper{source = Source}) -> + compute_bitstring(Topic, Timestamp, Source, 0). + +-spec compute_hash_bitmask(emqx_topic:words(), keymapper()) -> integer(). +compute_hash_bitmask(TopicFilter, #keymapper{source = Source}) -> + compute_hash_bitmask(TopicFilter, Source, 0). + +-spec compute_time_bitmask(keymapper()) -> integer(). +compute_time_bitmask(#keymapper{source = Source}) -> + compute_time_bitmask(Source, 0). hash(Input, Bits) -> % at most 32 bits erlang:phash2(Input, 1 bsl Bits). --spec compute_hash_bitmask(emqx_topic:words(), keymapper()) -> integer(). -compute_hash_bitmask(TopicFilter, Keymapper) -> - compute_hash_bitmask(TopicFilter, Keymapper#keymapper.topic_bits_per_level, 0). - %%================================================================================ %% Internal functions %%================================================================================ -compute_topic_hash(LevelsRest, [Bits], Acc) -> - Hash = hash(LevelsRest, Bits), - Acc bsl Bits + Hash; -compute_topic_hash([], [Bits | BitsRest], Acc) -> - Hash = hash(<<"/">>, Bits), - compute_topic_hash([], BitsRest, Acc bsl Bits + Hash); -compute_topic_hash([Level | LevelsRest], [Bits | BitsRest], Acc) -> - Hash = hash(Level, Bits), - compute_topic_hash(LevelsRest, BitsRest, Acc bsl Bits + Hash). +compute_bitstring(Topic, Timestamp, [{timestamp, Offset, Size} | Rest], Acc) -> + I = (Timestamp bsr Offset) band ones(Size), + compute_bitstring(Topic, Timestamp, Rest, (Acc bsl Size) + I); +compute_bitstring([], Timestamp, [{hash, level, Size} | Rest], Acc) -> + I = hash(<<"/">>, Size), + compute_bitstring([], Timestamp, Rest, (Acc bsl Size) + I); +compute_bitstring([Level | Tail], Timestamp, [{hash, level, Size} | Rest], Acc) -> + I = hash(Level, Size), + compute_bitstring(Tail, Timestamp, Rest, (Acc bsl Size) + I); +compute_bitstring(Tail, Timestamp, [{hash, levels, Size} | Rest], Acc) -> + I = hash(Tail, Size), + compute_bitstring(Tail, Timestamp, Rest, (Acc bsl Size) + I); +compute_bitstring(_, _, [], Acc) -> + Acc. -compute_hash_bitmask(['#'], BitsPerLevel, Acc) -> - Acc bsl lists:sum(BitsPerLevel) + 0; -compute_hash_bitmask(['+' | LevelsRest], [Bits | BitsRest], Acc) -> - compute_hash_bitmask(LevelsRest, BitsRest, Acc bsl Bits + 0); -compute_hash_bitmask(_, [Bits], Acc) -> - Acc bsl Bits + ones(Bits); -compute_hash_bitmask([], [Bits | BitsRest], Acc) -> - compute_hash_bitmask([], BitsRest, Acc bsl Bits + ones(Bits)); -compute_hash_bitmask([_ | LevelsRest], [Bits | BitsRest], Acc) -> - compute_hash_bitmask(LevelsRest, BitsRest, Acc bsl Bits + ones(Bits)); +compute_hash_bitmask(Filter, [{timestamp, _, Size} | Rest], Acc) -> + compute_hash_bitmask(Filter, Rest, (Acc bsl Size) + 0); +compute_hash_bitmask(['#'], [{hash, _, Size} | Rest], Acc) -> + compute_hash_bitmask(['#'], Rest, (Acc bsl Size) + 0); +compute_hash_bitmask(['+' | Tail], [{hash, _, Size} | Rest], Acc) -> + compute_hash_bitmask(Tail, Rest, (Acc bsl Size) + 0); +compute_hash_bitmask([], [{hash, level, Size} | Rest], Acc) -> + compute_hash_bitmask([], Rest, (Acc bsl Size) + ones(Size)); +compute_hash_bitmask([_ | Tail], [{hash, level, Size} | Rest], Acc) -> + compute_hash_bitmask(Tail, Rest, (Acc bsl Size) + ones(Size)); +compute_hash_bitmask(_, [{hash, levels, Size} | Rest], Acc) -> + compute_hash_bitmask([], Rest, (Acc bsl Size) + ones(Size)); compute_hash_bitmask(_, [], Acc) -> Acc. +compute_time_bitmask([{timestamp, _, Size} | Rest], Acc) -> + compute_time_bitmask(Rest, (Acc bsl Size) + ones(Size)); +compute_time_bitmask([{hash, _, Size} | Rest], Acc) -> + compute_time_bitmask(Rest, (Acc bsl Size) + 0); +compute_time_bitmask([], Acc) -> + Acc. + ones(Bits) -> 1 bsl Bits - 1. @@ -308,16 +345,16 @@ match_next( It = #it{ keymapper = Keymapper, topic_filter = TopicFilter, - hash_filter = HashFilter, + hash_bitfilter = HashBitfilter, hash_bitmask = HashBitmask, - start_time = StartTime + time_bitfilter = TimeBitfilter, + time_bitmask = TimeBitmask }, - TopicHash, - PublishedAt, + Bitstring, Value ) -> - HashMatches = (TopicHash band It#it.hash_bitmask) == It#it.hash_filter, - TimeMatches = PublishedAt >= It#it.start_time, + HashMatches = (Bitstring band HashBitmask) == HashBitfilter, + TimeMatches = (Bitstring band TimeBitmask) >= TimeBitfilter, case HashMatches of true when TimeMatches -> {Topic, MessagePayload} = unwrap_message_value(Value), @@ -327,13 +364,20 @@ match_next( false -> next(It#it{next_action = next}) end; - true -> - NextSeek = combine(TopicHash, StartTime, <<>>, Keymapper), + true when not TimeMatches -> + NextBitstring = (Bitstring band (bnot TimeBitmask)) bor TimeBitfilter, + NextSeek = combine(NextBitstring, <<>>, Keymapper), next(It#it{next_action = {seek, NextSeek}}); false -> - case compute_next_seek(TopicHash, HashFilter, HashBitmask, Keymapper) of - NextHash when is_integer(NextHash) -> - NextSeek = combine(NextHash, StartTime, <<>>, Keymapper), + % _ -> + case compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) of + NextBitstring when is_integer(NextBitstring) -> + % ct:pal("Bitstring = ~32.16.0B", [Bitstring]), + % ct:pal("Bitfilter = ~32.16.0B", [Bitfilter]), + % ct:pal("HBitmask = ~32.16.0B", [HashBitmask]), + % ct:pal("TBitmask = ~32.16.0B", [TimeBitmask]), + % ct:pal("NextBitstring = ~32.16.0B", [NextBitstring]), + NextSeek = combine(NextBitstring, <<>>, Keymapper), next(It#it{next_action = {seek, NextSeek}}); none -> stop_iteration(It) @@ -344,10 +388,12 @@ stop_iteration(It) -> ok = rocksdb:iterator_close(It#it.handle), none. -compute_next_seek(TopicHash, HashFilter, HashBitmask, Keymapper = #keymapper{}) -> - BitsPerLevel = Keymapper#keymapper.topic_bits_per_level, - compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel); -compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel) -> +compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) -> + Sources = Keymapper#keymapper.source, + Size = Keymapper#keymapper.bitsize, + compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size). + +compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) -> % NOTE % Ok, this convoluted mess implements a sort of _increment operation_ for some % strange number in variable bit-width base. There are `Levels` "digits", those @@ -356,66 +402,117 @@ compute_next_seek(TopicHash, HashFilter, HashBitmask, BitsPerLevel) -> % with exacly one possible value). % TODO make at least remotely readable / optimize later Result = zipfoldr3( - fun(LevelHash, Filter, LevelMask, Bits, Shift, {Carry, Acc}) -> - case LevelMask of - 0 when Carry == 0 -> - {0, Acc + (LevelHash bsl Shift)}; - 0 -> - LevelHash1 = LevelHash + Carry, - NextCarry = LevelHash1 bsr Bits, - NextAcc = (LevelHash1 band ones(Bits)) bsl Shift, - {NextCarry, NextAcc}; - _ when (LevelHash + Carry) == Filter -> - {0, Acc + (Filter bsl Shift)}; - _ when (LevelHash + Carry) > Filter -> - {1, Filter bsl Shift}; - _ -> - {0, Filter bsl Shift} + fun(Source, Substring, Filter, LBitmask, Offset, {Carry, Acc}) -> + case Source of + {hash, _, _} when LBitmask =:= 0, Carry =:= 0 -> + {0, Acc + (Substring bsl Offset)}; + {hash, _, S} when LBitmask =:= 0 -> + Substring1 = Substring + Carry, + Carry1 = Substring1 bsr S, + Acc1 = (Substring1 band ones(S)) bsl Offset, + {Carry1, Acc1}; + {hash, _, _} when LBitmask =/= 0, (Substring + Carry) =:= Filter -> + {0, Acc + (Filter bsl Offset)}; + {hash, _, _} when LBitmask =/= 0, (Substring + Carry) > Filter -> + {1, Filter bsl Offset}; + {hash, _, _} when LBitmask =/= 0 -> + {0, Filter bsl Offset}; + {timestamp, _, _} when Carry =:= 0 -> + {0, Acc + (Substring bsl Offset)}; + {timestamp, _, S} -> + Substring1 = Substring + Carry, + Carry1 = Substring1 bsr S, + Acc1 = (Substring1 band ones(S)) bsl Offset, + {Carry1, Acc1} end end, + % TODO + % We can put carry bit into the `Acc`'s MSB instead of wrapping it into a tuple. + % This could save us a heap alloc which might be imporatant in a hot path. {1, 0}, - TopicHash, - HashFilter, + Bitstring, + HashBitfilter, HashBitmask, - BitsPerLevel + Size, + Sources ), case Result of - {_, {_Carry = 0, Next}} -> - Next bor HashFilter; - {_, {_Carry = 1, _}} -> + {_Carry = 0, Next} -> + Next bor (HashBitfilter band HashBitmask); + {_Carry = 1, _} -> % we got "carried away" past the range, time to stop iteration none end. -zipfoldr3(_FoldFun, Acc, _, _, _, []) -> - {0, Acc}; -zipfoldr3(FoldFun, Acc, I1, I2, I3, [Bits | Rest]) -> - {Shift, AccNext} = zipfoldr3( - FoldFun, - Acc, - I1, - I2, - I3, - Rest - ), - { - Shift + Bits, - FoldFun( - (I1 bsr Shift) band ones(Bits), - (I2 bsr Shift) band ones(Bits), - (I3 bsr Shift) band ones(Bits), - Bits, - Shift, - AccNext - ) - }. +zipfoldr3(_FoldFun, Acc, _, _, _, 0, []) -> + Acc; +zipfoldr3(FoldFun, Acc, I1, I2, I3, Offset, [Source = {_, _, S} | Rest]) -> + OffsetNext = Offset - S, + AccNext = zipfoldr3(FoldFun, Acc, I1, I2, I3, OffsetNext, Rest), + FoldFun( + Source, + substring(I1, OffsetNext, S), + substring(I2, OffsetNext, S), + substring(I3, OffsetNext, S), + OffsetNext, + AccNext + ). + +substring(I, Offset, Size) -> + (I bsr Offset) band ones(Size). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +make_keymapper_test_() -> + [ + ?_assertEqual( + #keymapper{ + source = [ + {timestamp, 9, 23}, + {hash, level, 2}, + {hash, level, 4}, + {hash, levels, 8}, + {timestamp, 0, 9} + ], + bitsize = 46, + tau = 512 + }, + make_keymapper(#{ + timestamp_bits => 32, + topic_bits_per_level => [2, 4, 8], + max_tau => 1000 + }) + ), + ?_assertEqual( + #keymapper{ + source = [ + {timestamp, 0, 32}, + {hash, levels, 16} + ], + bitsize = 48, + tau = 1 + }, + make_keymapper(#{ + timestamp_bits => 32, + topic_bits_per_level => [16], + max_tau => 1 + }) + ) + ]. + compute_test_bitmask(TopicFilter) -> - compute_hash_bitmask(TopicFilter, [3, 4, 5, 2], 0). + compute_hash_bitmask( + TopicFilter, + [ + {hash, level, 3}, + {hash, level, 4}, + {hash, level, 5}, + {hash, levels, 2} + ], + 0 + ). bitmask_test_() -> [ @@ -464,8 +561,19 @@ wildcard_bitmask_test_() -> %% Key3 = |123|999|679|001| → Seek = 1 |123|000|678|000| → eos %% Key4 = |125|011|179|017| → Seek = 1 |123|000|678|000| → eos -compute_test_next_seek(TopicHash, HashFilter, HashBitmask) -> - compute_next_seek(TopicHash, HashFilter, HashBitmask, [8, 8, 16, 12]). +compute_test_next_seek(Bitstring, Bitfilter, HBitmask) -> + compute_next_seek( + Bitstring, + Bitfilter, + HBitmask, + [ + {hash, level, 8}, + {hash, level, 8}, + {hash, level, 16}, + {hash, levels, 12} + ], + 8 + 8 + 16 + 12 + ). next_seek_test_() -> [ diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index b1a8a396b..5608f6008 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -52,7 +52,7 @@ t_iterate(Config) -> begin {ok, It} = emqx_replay_message_storage:make_iterator(DB, Topic, 0), Values = iterate(It), - ?assertEqual(Values, lists:map(fun integer_to_binary/1, Timestamps)) + ?assertEqual(lists:map(fun integer_to_binary/1, Timestamps), Values) end || Topic <- Topics ], @@ -137,28 +137,30 @@ parse_topic(Topic) -> t_prop_topic_hash_computes(_) -> Keymapper = emqx_replay_message_storage:make_keymapper(#{ + timestamp_bits => 32, topic_bits_per_level => [8, 12, 16, 24], - timestamp_bits => 0 + max_tau => 10000 }), ?assert( proper:quickcheck( - ?FORALL(Topic, topic(), begin - Hash = emqx_replay_message_storage:compute_topic_hash(Topic, Keymapper), - is_integer(Hash) andalso (byte_size(binary:encode_unsigned(Hash)) =< 8) + ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin + BS = emqx_replay_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), + is_integer(BS) andalso (BS < (1 bsl 92)) end) ) ). t_prop_hash_bitmask_computes(_) -> Keymapper = emqx_replay_message_storage:make_keymapper(#{ - topic_bits_per_level => [8, 12, 16, 24], - timestamp_bits => 0 + timestamp_bits => 16, + topic_bits_per_level => [8, 12, 16], + max_tau => 100 }), ?assert( proper:quickcheck( ?FORALL(TopicFilter, topic_filter(), begin - Hash = emqx_replay_message_storage:compute_hash_bitmask(TopicFilter, Keymapper), - is_integer(Hash) andalso (byte_size(binary:encode_unsigned(Hash)) =< 8) + Mask = emqx_replay_message_storage:compute_hash_bitmask(TopicFilter, Keymapper), + is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) end) ) ). @@ -252,8 +254,9 @@ init_per_testcase(TC, Config) -> {ok, DB} = emqx_replay_message_storage:open(Filename, #{ column_family => {atom_to_list(TC), []}, keymapper => emqx_replay_message_storage:make_keymapper(#{ + timestamp_bits => 64, topic_bits_per_level => [8, 8, 32, 16], - timestamp_bits => 64 + max_tau => 5 }) }), [{handle, DB} | Config]. From b5bb77dd581e0afa59a9e55a38e789e905b66d58 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 1 Jan 2023 14:34:25 +0100 Subject: [PATCH 09/49] feat(ds): Add metadata storage and supervision tree --- apps/emqx_replay/src/emqx_replay.erl | 47 +++ apps/emqx_replay/src/emqx_replay_app.erl | 7 +- .../src/emqx_replay_local_store.erl | 278 ++++++++++++++++++ .../src/emqx_replay_local_store_sup.erl | 74 +++++ .../src/emqx_replay_message_storage.erl | 170 +++++++---- apps/emqx_replay/src/emqx_replay_sup.erl | 64 ++++ .../test/emqx_replay_storage_SUITE.erl | 79 +++-- scripts/check-elixir-applications.exs | 2 +- scripts/check-elixir-deps-discrepancies.exs | 2 +- ...elixir-emqx-machine-boot-discrepancies.exs | 2 +- 10 files changed, 623 insertions(+), 102 deletions(-) create mode 100644 apps/emqx_replay/src/emqx_replay.erl create mode 100644 apps/emqx_replay/src/emqx_replay_local_store.erl create mode 100644 apps/emqx_replay/src/emqx_replay_local_store_sup.erl create mode 100644 apps/emqx_replay/src/emqx_replay_sup.erl diff --git a/apps/emqx_replay/src/emqx_replay.erl b/apps/emqx_replay/src/emqx_replay.erl new file mode 100644 index 000000000..ee83e35d9 --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay.erl @@ -0,0 +1,47 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_replay). + +%% API: +-export([]). + +-export_type([topic/0, time/0]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +%% parsed +-type topic() :: list(binary()). + +%% TODO granularity? +-type time() :: integer(). + +%%================================================================================ +%% API funcions +%%================================================================================ + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +%%================================================================================ +%% Internal exports +%%================================================================================ + +%%================================================================================ +%% Internal functions +%%================================================================================ diff --git a/apps/emqx_replay/src/emqx_replay_app.erl b/apps/emqx_replay/src/emqx_replay_app.erl index 090299150..bf6fd0b55 100644 --- a/apps/emqx_replay/src/emqx_replay_app.erl +++ b/apps/emqx_replay/src/emqx_replay_app.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -16,4 +16,7 @@ -module(emqx_replay_app). --export([]). +-export([start/2]). + +start(_Type, _Args) -> + emqx_replay_sup:start_link(). diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl new file mode 100644 index 000000000..0c1eb4171 --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -0,0 +1,278 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_replay_local_store). + +-behavior(gen_server). + +%% API: +-export([start_link/1]). + +-export([make_iterator/3, store/5, next/1]). + +%% behavior callbacks: +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). + +-export_type([cf_refs/0, gen_id/0, db_write_options/0]). + +-compile({inline, [meta_lookup/2]}). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +%% see rocksdb:db_options() +-type options() :: proplists:proplist(). + +-type db_write_options() :: proplists:proplist(). + +-type cf_refs() :: [{_CFName :: string(), _CFRef :: reference()}]. + +-record(generation, { + %% Module that handles data for the generation + module :: module(), + %% Module-specific attributes + data :: term() + % time_range :: {emqx_replay:time(), emqx_replay:time()} +}). + +-record(s, { + zone :: emqx_types:zone(), + db :: rocksdb:db_handle(), + column_families :: cf_refs() +}). + +-record(it, { + module :: module(), + data :: term() +}). + +-type gen_id() :: 0..16#ffff. + +-opaque iterator() :: #it{}. + +%% Contents of the default column family: +%% +%% [{<<"genNN">>, #generation{}}, ..., +%% {<<"current">>, GenID}] + +-define(DEFAULT_CF_OPTS, []). + +%%================================================================================ +%% API funcions +%%================================================================================ + +-spec start_link(emqx_types:zone()) -> {ok, pid()}. +start_link(Zone) -> + gen_server:start_link(?MODULE, [Zone], []). + +-spec make_iterator(emqx_types:zone(), emqx_topic:words(), emqx_replay_message_storage:time()) -> + {ok, _TODO} | {error, _TODO}. +make_iterator(Zone, TopicFilter, StartTime) -> + %% TODO: this is not supposed to work like this. Just a mock-up + #generation{module = Mod, data = Data} = meta_lookup(Zone, 0), + case Mod:make_iterator(Data, TopicFilter, StartTime) of + {ok, It} -> + {ok, #it{ + module = Mod, + data = It + }}; + Err -> + Err + end. + +-spec store(emqx_types:zone(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary()) -> + ok | {error, _TODO}. +store(Zone, GUID, Time, Topic, Msg) -> + %% TODO: this is not supposed to work like this. Just a mock-up + #generation{module = Mod, data = Data} = meta_lookup(Zone, 0), + Mod:store(Data, GUID, Time, Topic, Msg). + +-spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}. +next(#it{module = Mod, data = It0}) -> + case Mod:next(It0) of + {value, Val, It} -> + {value, Val, #it{module = Mod, data = It}}; + Other -> + Other + end. + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +init([Zone]) -> + process_flag(trap_exit, true), + {ok, DBHandle, CFRefs} = open_db(Zone), + S0 = #s{ + zone = Zone, + db = DBHandle, + column_families = CFRefs + }, + S = ensure_current_generation(S0), + read_metadata(S), + {ok, S}. + +handle_call(_Call, _From, S) -> + {reply, {error, unknown_call}, S}. + +handle_cast(_Cast, S) -> + {noreply, S}. + +handle_info(_Info, S) -> + {noreply, S}. + +terminate(_Reason, #s{db = DB, zone = Zone}) -> + meta_erase(Zone), + ok = rocksdb:close(DB). + +%%================================================================================ +%% Internal functions +%%================================================================================ + +-spec read_metadata(#s{}) -> #s{}. +read_metadata(S) -> + %% TODO: just a mockup to make the existing tests pass + read_metadata(0, S). + +-spec read_metadata(gen_id(), #s{}) -> #s{}. +read_metadata(GenId, S = #s{zone = Zone, db = DBHandle, column_families = CFs}) -> + Gen = #generation{module = Mod, data = Data} = schema_get_gen(DBHandle, GenId), + DB = Mod:open(DBHandle, GenId, CFs, Data), + meta_put(Zone, GenId, Gen#generation{data = DB}). + +-spec ensure_current_generation(#s{}) -> #s{}. +ensure_current_generation(S = #s{zone = Zone, db = DBHandle, column_families = CFs}) -> + case schema_get_current(DBHandle) of + undefined -> + GenId = 0, + ok = schema_put_current(DBHandle, GenId), + create_new_generation_schema(GenId, S); + _GenId -> + S + end. + +-spec create_new_generation_schema(gen_id(), #s{}) -> #s{}. +create_new_generation_schema( + GenId, S = #s{zone = Zone, db = DBHandle, column_families = CFs} +) -> + {Module, Options} = new_generation_config(Zone), + {NewGenData, NewCFs} = Module:create_new(DBHandle, GenId, Options), + NewGen = #generation{ + module = Module, + data = NewGenData + }, + %% TODO: Transaction? Column family creation can't be transactional, anyway. + ok = schema_put_gen(DBHandle, GenId, NewGen), + S#s{column_families = NewCFs ++ CFs}. + +-spec new_generation_config(emqx_types:zone()) -> + {module(), term()}. +new_generation_config(Zone) -> + %% TODO: make a proper HOCON schema and all... + Zones = application:get_env(emqx_replay, zone_config, #{}), + DefaultConf = + #{ + timestamp_bits => 64, + topic_bits_per_level => [8, 8, 8, 32, 16], + max_tau => 5 + }, + maps:get(Zone, Zones, {emqx_replay_message_storage, DefaultConf}). + +-spec open_db(emqx_types:zone()) -> {ok, rocksdb:db_handle(), cf_refs()} | {error, _TODO}. +open_db(Zone) -> + Filename = atom_to_list(Zone), + DBOptions = application:get_env(emqx_replay, db_options, []), + ColumnFamiles = + case rocksdb:list_column_families(Filename, DBOptions) of + {ok, ColumnFamiles0} -> + [{I, []} || I <- ColumnFamiles0]; + % DB is not present. First start + {error, {db_open, _}} -> + [{"default", ?DEFAULT_CF_OPTS}] + end, + case rocksdb:open(Filename, [{create_if_missing, true} | DBOptions], ColumnFamiles) of + {ok, Handle, CFRefs} -> + {CFNames, _} = lists:unzip(ColumnFamiles), + {ok, Handle, lists:zip(CFNames, CFRefs)}; + Error -> + Error + end. + +%% Functions for dealing with the metadata stored persistently in rocksdb + +-define(CURRENT_GEN, <<"current">>). +-define(SCHEMA_WRITE_OPTS, []). +-define(SCHEMA_READ_OPTS, []). + +-spec schema_get_gen(rocksdb:db_handle(), gen_id()) -> #generation{}. +schema_get_gen(DBHandle, GenId) -> + {ok, Bin} = rocksdb:get(DBHandle, gen_rocksdb_key(GenId), ?SCHEMA_READ_OPTS), + binary_to_term(Bin). + +-spec schema_put_gen(rocksdb:db_handle(), gen_id(), #generation{}) -> ok | {error, _}. +schema_put_gen(DBHandle, GenId, Gen) -> + rocksdb:put(DBHandle, gen_rocksdb_key(GenId), term_to_binary(Gen), ?SCHEMA_WRITE_OPTS). + +-spec schema_get_current(rocksdb:db_handle()) -> gen_id() | undefined. +schema_get_current(DBHandle) -> + case rocksdb:get(DBHandle, ?CURRENT_GEN, ?SCHEMA_READ_OPTS) of + {ok, Bin} -> + binary_to_integer(Bin); + not_found -> + undefined + end. + +-spec schema_put_current(rocksdb:db_handle(), gen_id()) -> ok | {error, _}. +schema_put_current(DBHandle, GenId) -> + rocksdb:put(DBHandle, ?CURRENT_GEN, integer_to_binary(GenId), ?SCHEMA_WRITE_OPTS). + +-spec gen_rocksdb_key(integer()) -> string(). +gen_rocksdb_key(N) -> + <<"gen", N:32>>. + +-undef(CURRENT_GEN). +-undef(SCHEMA_WRITE_OPTS). +-undef(SCHEMA_READ_OPTS). + +%% Functions for dealing with the runtime zone metadata: + +-define(PERSISTENT_TERM(ZONE, GEN), {?MODULE, ZONE, GEN}). + +-spec meta_lookup(emqx_types:zone(), gen_id()) -> #generation{}. +meta_lookup(Zone, GenId) -> + persistent_term:get(?PERSISTENT_TERM(Zone, GenId)). + +-spec meta_put(emqx_types:zone(), gen_id(), #generation{}) -> ok. +meta_put(Zone, GenId, Gen) -> + persistent_term:put(?PERSISTENT_TERM(Zone, GenId), Gen). + +-spec meta_erase(emqx_types:zone()) -> ok. +meta_erase(Zone) -> + [ + persistent_term:erase(K) + || {K = ?PERSISTENT_TERM(Z, _), _} <- persistent_term:get(), Z =:= Zone + ], + ok. + +-undef(PERSISTENT_TERM). + +%% -spec store_cfs(rocksdb:db_handle(), [{string(), rocksdb:cf_handle()}]) -> ok. +%% store_cfs(DBHandle, CFRefs) -> +%% lists:foreach( +%% fun({CFName, CFRef}) -> +%% persistent_term:put({self(), CFName}, {DBHandle, CFRef}) +%% end, +%% CFRefs). diff --git a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl new file mode 100644 index 000000000..fb88ef212 --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl @@ -0,0 +1,74 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_replay_local_store_sup). + +-behavior(supervisor). + +%% API: +-export([start_link/0, start_zone/1, stop_zone/1]). + +%% behavior callbacks: +-export([init/1]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(SUP, ?MODULE). + +%%================================================================================ +%% API funcions +%%================================================================================ + +-spec start_link() -> {ok, pid()}. +start_link() -> + supervisor:start_link({local, ?SUP}, ?MODULE, []). + +-spec start_zone(emqx_types:zone()) -> supervisor:startchild_ret(). +start_zone(Zone) -> + supervisor:start_child(?SUP, zone_child_spec(Zone)). + +-spec stop_zone(emqx_types:zone()) -> ok | {error, _}. +stop_zone(Zone) -> + ok = supervisor:terminate_child(?SUP, Zone), + ok = supervisor:delete_child(?SUP, Zone). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +init([]) -> + Children = [], + SupFlags = #{ + strategy => one_for_one, + intensity => 10, + period => 10 + }, + {ok, {SupFlags, Children}}. + +%%================================================================================ +%% Internal functions +%%================================================================================ + +-spec zone_child_spec(emqx_types:zone()) -> supervisor:child_spec(). +zone_child_spec(Zone) -> + #{ + id => Zone, + start => {emqx_replay_local_store, start_link, [Zone]}, + shutdown => 5_000, + restart => permanent, + type => worker + }. diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 66668b23b..94f297750 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -16,8 +16,80 @@ -module(emqx_replay_message_storage). +%%================================================================================ +%% @doc Description of the schema +%% +%% Let us assume that `T' is a topic and `t' is time. These are the two +%% dimensions used to index messages. They can be viewed as +%% "coordinates" of an MQTT message in a 2D space. +%% +%% Oftentimes, when wildcard subscription is used, keys must be +%% scanned in both dimensions simultaneously. +%% +%% Rocksdb allows to iterate over sorted keys very fast. This means we +%% need to map our two-dimentional keys to a single index that is +%% sorted in a way that helps to iterate over both time and topic +%% without having to do a lot of random seeks. +%% +%% == Mapping of 2D keys to rocksdb keys == +%% +%% We use "zigzag" pattern to store messages, where rocksdb key is +%% composed like like this: +%% +%% |ttttt|TTTTTTTTT|tttt| +%% ^ ^ ^ +%% | | | +%% +-------+ | +---------+ +%% | | | +%% most significant topic hash least significant +%% bits of timestamp bits of timestamp +%% +%% Topic hash is level-aware: each topic level is hashed separately +%% and the resulting hashes are bitwise-concatentated. This allows us +%% to map topics to fixed-length bitstrings while keeping some degree +%% of information about the hierarchy. +%% +%% Next important concept is what we call "tau-interval". It is time +%% interval determined by the number of least significant bits of the +%% timestamp found at the tail of the rocksdb key. +%% +%% The resulting index is a space-filling curve that looks like +%% this in the topic-time 2D space: +%% +%% T ^ ---->------ |---->------ |---->------ +%% | --/ / --/ / --/ +%% | -<-/ | -<-/ | -<-/ +%% | -/ | -/ | -/ +%% | ---->------ | ---->------ | ---->------ +%% | --/ / --/ / --/ +%% | ---/ | ---/ | ---/ +%% | -/ ^ -/ ^ -/ +%% | ---->------ | ---->------ | ---->------ +%% | --/ / --/ / --/ +%% | -<-/ | -<-/ | -<-/ +%% | -/ | -/ | -/ +%% | ---->------| ---->------| ----------> +%% | +%% -+------------+-----------------------------> t +%% tau +%% +%% This structure allows to quickly seek to a the first message that +%% was recorded in a certain tau-interval in a certain topic or a +%% group of topics matching filter like `foo/bar/+/+' or `foo/bar/#`. +%% +%% Due to its structure, for each pair of rocksdb keys K1 and K2, such +%% that K1 > K2 and topic(K1) = topic(K2), timestamp(K1) > +%% timestamp(K2). +%% That is, replay doesn't reorder messages published in each +%% individual topic. +%% +%% This property doesn't hold between different topics, but it's not deemed +%% a problem right now. +%% +%%================================================================================ + %% API: --export([open/2, close/1]). +-export([create_new/3, open/4]). -export([make_keymapper/1]). -export([store/5]). @@ -55,29 +127,11 @@ %% and _rest of levels_ (if any) get 16 bits. -type bits_per_level() :: [bits(), ...]. -%% see rocksdb:db_options() --type db_options() :: proplists:proplist(). - -%% see rocksdb:cf_options() --type db_cf_options() :: proplists:proplist(). - -%% see rocksdb:write_options() --type db_write_options() :: proplists:proplist(). - -%% see rocksdb:read_options() --type db_read_options() :: proplists:proplist(). - -type options() :: #{ %% Keymapper. keymapper := keymapper(), %% Name and options to use to open specific column family. - column_family => {_Name :: string(), db_cf_options()}, - %% Options to use when opening the DB. - open_options => db_options(), - %% Options to use when writing a message to the DB. - write_options => db_write_options(), - %% Options to use when iterating over messages in the DB. - read_options => db_read_options() + cf_options => emqx_replay_local_store:db_cf_options() }. -define(DEFAULT_COLUMN_FAMILY, {"default", []}). @@ -90,12 +144,18 @@ -define(DEFAULT_WRITE_OPTIONS, [{sync, true}]). -define(DEFAULT_READ_OPTIONS, []). +%% Persistent configuration of the generation, it is used to create db +%% record when the database is reopened +-record(schema, {keymapper :: keymapper()}). + +-type schema() :: #schema{}. + -record(db, { handle :: rocksdb:db_handle(), cf :: rocksdb:cf_handle(), keymapper :: keymapper(), - write_options = [{sync, true}] :: db_write_options(), - read_options = [] :: db_write_options() + write_options = [{sync, true}] :: emqx_replay_local_store:db_write_options(), + read_options = [] :: emqx_replay_local_store:db_write_options() }). -record(it, { @@ -132,40 +192,33 @@ %% API funcions %%================================================================================ --spec open(file:filename_all(), options()) -> - {ok, db()} | {error, _TODO}. -open(Filename, Options) -> - CFDescriptors = - case maps:get(column_family, Options, undefined) of - CF = {_Name, _} -> - % TODO - % > When opening a DB in a read-write mode, you need to specify all - % > Column Families that currently exist in a DB. If that's not the case, - % > DB::Open call will return Status::InvalidArgument(). - % This probably means that we need the _manager_ (the thing which knows - % about all the column families there is) to hold the responsibility to - % open the database and hold all the handles. - [CF, ?DEFAULT_COLUMN_FAMILY]; - undefined -> - [?DEFAULT_COLUMN_FAMILY] - end, - DBOptions = maps:get(open_options, Options, ?DEFAULT_OPEN_OPTIONS), - case rocksdb:open(Filename, DBOptions, CFDescriptors) of - {ok, Handle, [CFHandle | _]} -> - {ok, #db{ - handle = Handle, - cf = CFHandle, - keymapper = maps:get(keymapper, Options), - write_options = maps:get(write_options, Options, ?DEFAULT_WRITE_OPTIONS), - read_options = maps:get(read_options, Options, ?DEFAULT_READ_OPTIONS) - }}; - Error -> - Error - end. +%% Create a new column family for the generation and a serializable representation of the schema +-spec create_new(rocksdb:db_handle(), emqx_replay_local_store:generation_id(), options()) -> + {schema(), emqx_replay_local_store:cf_refs()}. +create_new(DBHandle, GenId, Options) -> + CFName = data_cf(GenId), + CFOptions = maps:get(cf_options, Options, []), + {ok, CFHandle} = rocksdb:create_column_family(DBHandle, CFName, CFOptions), + Schema = #schema{keymapper = make_keymapper(Options)}, + {Schema, [{CFName, CFHandle}]}. --spec close(db()) -> ok | {error, _}. -close(#db{handle = DB}) -> - rocksdb:close(DB). +%% Reopen the database +-spec open( + rocksdb:db_handle(), + emqx_replay_local_store:generation_id(), + [{_CFName :: string(), _CFHandle :: reference()}], + schema() +) -> + db(). +open(DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) -> + CFHandle = proplists:get_value(data_cf(GenId), CFs), + % assert + true = is_reference(CFHandle), + #db{ + handle = DBHandle, + cf = CFHandle, + keymapper = Keymapper + }. -spec make_keymapper(Options) -> keymapper() when Options :: #{ @@ -461,6 +514,11 @@ zipfoldr3(FoldFun, Acc, I1, I2, I3, Offset, [Source = {_, _, S} | Rest]) -> substring(I, Offset, Size) -> (I bsr Offset) band ones(Size). +%% @doc Generate a column family ID for the MQTT messages +-spec data_cf(emqx_replay_local_store:gen_id()) -> string(). +data_cf(GenId) -> + ?MODULE_STRING ++ integer_to_list(GenId). + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/apps/emqx_replay/src/emqx_replay_sup.erl b/apps/emqx_replay/src/emqx_replay_sup.erl new file mode 100644 index 000000000..a5da13c7a --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay_sup.erl @@ -0,0 +1,64 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_replay_sup). + +-behavior(supervisor). + +%% API: +-export([start_link/0]). + +%% behavior callbacks: +-export([init/1]). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-define(SUP, ?MODULE). + +%%================================================================================ +%% API funcions +%%================================================================================ + +-spec start_link() -> {ok, pid()}. +start_link() -> + supervisor:start_link({local, ?SUP}, ?MODULE, []). + +%%================================================================================ +%% behavior callbacks +%%================================================================================ + +init([]) -> + Children = [zone_sup()], + SupFlags = #{ + strategy => one_for_all, + intensity => 0, + period => 1 + }, + {ok, {SupFlags, Children}}. + +%%================================================================================ +%% Internal functions +%%================================================================================ + +zone_sup() -> + #{ + id => local_store_zone_sup, + start => {emqx_replay_local_store_sup, start_link, []}, + restart => permanent, + type => supervisor, + shutdown => infinity + }. diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index 5608f6008..237823014 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -22,24 +22,29 @@ -include_lib("stdlib/include/assert.hrl"). -include_lib("proper/include/proper.hrl"). +-define(ZONE, zone(?FUNCTION_NAME)). + +%% Smoke test for opening and reopening the database +t_open(Config) -> + ok = emqx_replay_local_store_sup:stop_zone(?ZONE), + {ok, _} = emqx_replay_local_store_sup:start_zone(?ZONE). + %% Smoke test of store function t_store(Config) -> - DB = ?config(handle, Config), MessageID = emqx_guid:gen(), PublishedAt = 1000, Topic = [<<"foo">>, <<"bar">>], Payload = <<"message">>, - ?assertMatch(ok, emqx_replay_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload)). + ?assertMatch(ok, emqx_replay_local_store:store(?ZONE, MessageID, PublishedAt, Topic, Payload)). %% Smoke test for iteration through a concrete topic t_iterate(Config) -> - DB = ?config(handle, Config), %% Prepare data: Topics = [[<<"foo">>, <<"bar">>], [<<"foo">>, <<"bar">>, <<"baz">>], [<<"a">>]], Timestamps = lists:seq(1, 10), [ - emqx_replay_message_storage:store( - DB, + emqx_replay_local_store:store( + ?ZONE, emqx_guid:gen(), PublishedAt, Topic, @@ -50,7 +55,7 @@ t_iterate(Config) -> %% Iterate through individual topics: [ begin - {ok, It} = emqx_replay_message_storage:make_iterator(DB, Topic, 0), + {ok, It} = emqx_replay_local_store:make_iterator(?ZONE, Topic, 0), Values = iterate(It), ?assertEqual(lists:map(fun integer_to_binary/1, Timestamps), Values) end @@ -60,68 +65,67 @@ t_iterate(Config) -> %% Smoke test for iteration with wildcard topic filter t_iterate_wildcard(Config) -> - DB = ?config(handle, Config), %% Prepare data: Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], Timestamps = lists:seq(1, 10), _ = [ - store(DB, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) + store(?ZONE, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) || Topic <- Topics, PublishedAt <- Timestamps ], ?assertEqual( lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- Timestamps]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "#", 0)]) ), ?assertEqual( [], - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "#", 10 + 1)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "#", 10 + 1)]) ), ?assertEqual( lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- lists:seq(5, 10)]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "#", 5)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "#", 5)]) ), ?assertEqual( lists:sort([ {Topic, PublishedAt} || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps ]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "foo/#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/#", 0)]) ), ?assertEqual( lists:sort([{"foo/bar", PublishedAt} || PublishedAt <- Timestamps]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "foo/+", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/+", 0)]) ), ?assertEqual( [], - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "foo/+/bar", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/+/bar", 0)]) ), ?assertEqual( lists:sort([ {Topic, PublishedAt} || Topic <- ["foo/bar", "foo/bar/baz", "a/bar"], PublishedAt <- Timestamps ]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "+/bar/#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "+/bar/#", 0)]) ), ?assertEqual( lists:sort([{Topic, PublishedAt} || Topic <- ["a", "a/bar"], PublishedAt <- Timestamps]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "a/#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "a/#", 0)]) ), ?assertEqual( [], - lists:sort([binary_to_term(Payload) || Payload <- iterate(DB, "a/+/+", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "a/+/+", 0)]) ), ok. -store(DB, PublishedAt, Topic, Payload) -> +store(Zone, PublishedAt, Topic, Payload) -> ID = emqx_guid:gen(), - emqx_replay_message_storage:store(DB, ID, PublishedAt, parse_topic(Topic), Payload). + emqx_replay_local_store:store(Zone, ID, PublishedAt, parse_topic(Topic), Payload). iterate(DB, TopicFilter, StartTime) -> - {ok, It} = emqx_replay_message_storage:make_iterator(DB, parse_topic(TopicFilter), StartTime), + {ok, It} = emqx_replay_local_store:make_iterator(DB, parse_topic(TopicFilter), StartTime), iterate(It). iterate(It) -> - case emqx_replay_message_storage:next(It) of + case emqx_replay_local_store:next(It) of {value, Payload, ItNext} -> [Payload | iterate(ItNext)]; none -> @@ -166,7 +170,6 @@ t_prop_hash_bitmask_computes(_) -> ). t_prop_iterate_stored_messages(Config) -> - DB = ?config(handle, Config), ?assertEqual( true, proper:quickcheck( @@ -175,7 +178,7 @@ t_prop_iterate_stored_messages(Config) -> messages(), begin Stream = payload_gen:interleave_streams(Streams), - ok = store_message_stream(DB, Stream), + ok = store_message_stream(?ZONE, Stream), % TODO actually verify some property true end @@ -183,12 +186,12 @@ t_prop_iterate_stored_messages(Config) -> ) ). -store_message_stream(DB, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> +store_message_stream(Zone, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> MessageID = <>, PublishedAt = rand:uniform(ChunkNum), - ok = emqx_replay_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload), - store_message_stream(DB, payload_gen:next(Rest)); -store_message_stream(_DB, []) -> + ok = emqx_replay_local_store:store(Zone, MessageID, PublishedAt, Topic, Payload), + store_message_stream(Zone, payload_gen:next(Rest)); +store_message_stream(_Zone, []) -> ok. messages() -> @@ -249,18 +252,12 @@ topic_level(Entropy) -> all() -> emqx_common_test_helpers:all(?MODULE). init_per_testcase(TC, Config) -> - Filename = filename:join(?MODULE_STRING, atom_to_list(TC)), - ok = filelib:ensure_dir(Filename), - {ok, DB} = emqx_replay_message_storage:open(Filename, #{ - column_family => {atom_to_list(TC), []}, - keymapper => emqx_replay_message_storage:make_keymapper(#{ - timestamp_bits => 64, - topic_bits_per_level => [8, 8, 32, 16], - max_tau => 5 - }) - }), - [{handle, DB} | Config]. + {ok, _} = application:ensure_all_started(emqx_replay), + {ok, _} = emqx_replay_local_store_sup:start_zone(zone(TC)), + Config. end_per_testcase(_TC, Config) -> - DB = ?config(handle, Config), - catch emqx_replay_message_storage:close(DB). + ok = application:stop(emqx_replay). + +zone(TC) -> + list_to_atom(?MODULE_STRING ++ atom_to_list(TC)). diff --git a/scripts/check-elixir-applications.exs b/scripts/check-elixir-applications.exs index 42c838199..1e604c69f 100755 --- a/scripts/check-elixir-applications.exs +++ b/scripts/check-elixir-applications.exs @@ -1,4 +1,4 @@ -#!/usr/bin/env elixir +#! /usr/bin/env elixir defmodule CheckElixirApplications do alias EMQXUmbrella.MixProject diff --git a/scripts/check-elixir-deps-discrepancies.exs b/scripts/check-elixir-deps-discrepancies.exs index 408079d7d..1363219ed 100755 --- a/scripts/check-elixir-deps-discrepancies.exs +++ b/scripts/check-elixir-deps-discrepancies.exs @@ -1,4 +1,4 @@ -#!/usr/bin/env elixir +#! /usr/bin/env elixir # ensure we have a fresh rebar.lock diff --git a/scripts/check-elixir-emqx-machine-boot-discrepancies.exs b/scripts/check-elixir-emqx-machine-boot-discrepancies.exs index d07e6978f..9ffdc47bf 100755 --- a/scripts/check-elixir-emqx-machine-boot-discrepancies.exs +++ b/scripts/check-elixir-emqx-machine-boot-discrepancies.exs @@ -1,4 +1,4 @@ -#!/usr/bin/env elixir +#! /usr/bin/env elixir defmodule CheckElixirEMQXMachineBootDiscrepancies do alias EMQXUmbrella.MixProject From 0bbc5ecb321c05b1d8ad67fc5aa7f30f570e9508 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 1 Jan 2023 14:42:04 +0100 Subject: [PATCH 10/49] refactor(ds): Introduce bitwise_concat function --- .../src/emqx_replay_message_storage.erl | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 94f297750..f2b45d221 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -106,6 +106,8 @@ -export_type([db/0, iterator/0]). +-compile({inline, [ones/1, bitwise_concat/3]}). + %%================================================================================ %% Type declarations %%================================================================================ @@ -343,41 +345,44 @@ hash(Input, Bits) -> compute_bitstring(Topic, Timestamp, [{timestamp, Offset, Size} | Rest], Acc) -> I = (Timestamp bsr Offset) band ones(Size), - compute_bitstring(Topic, Timestamp, Rest, (Acc bsl Size) + I); + compute_bitstring(Topic, Timestamp, Rest, bitwise_concat(Acc, I, Size)); compute_bitstring([], Timestamp, [{hash, level, Size} | Rest], Acc) -> I = hash(<<"/">>, Size), - compute_bitstring([], Timestamp, Rest, (Acc bsl Size) + I); + compute_bitstring([], Timestamp, Rest, bitwise_concat(Acc, I, Size)); compute_bitstring([Level | Tail], Timestamp, [{hash, level, Size} | Rest], Acc) -> I = hash(Level, Size), - compute_bitstring(Tail, Timestamp, Rest, (Acc bsl Size) + I); + compute_bitstring(Tail, Timestamp, Rest, bitwise_concat(Acc, I, Size)); compute_bitstring(Tail, Timestamp, [{hash, levels, Size} | Rest], Acc) -> I = hash(Tail, Size), - compute_bitstring(Tail, Timestamp, Rest, (Acc bsl Size) + I); + compute_bitstring(Tail, Timestamp, Rest, bitwise_concat(Acc, I, Size)); compute_bitstring(_, _, [], Acc) -> Acc. compute_hash_bitmask(Filter, [{timestamp, _, Size} | Rest], Acc) -> - compute_hash_bitmask(Filter, Rest, (Acc bsl Size) + 0); + compute_hash_bitmask(Filter, Rest, bitwise_concat(Acc, 0, Size)); compute_hash_bitmask(['#'], [{hash, _, Size} | Rest], Acc) -> - compute_hash_bitmask(['#'], Rest, (Acc bsl Size) + 0); + compute_hash_bitmask(['#'], Rest, bitwise_concat(Acc, 0, Size)); compute_hash_bitmask(['+' | Tail], [{hash, _, Size} | Rest], Acc) -> - compute_hash_bitmask(Tail, Rest, (Acc bsl Size) + 0); + compute_hash_bitmask(Tail, Rest, bitwise_concat(Acc, 0, Size)); compute_hash_bitmask([], [{hash, level, Size} | Rest], Acc) -> - compute_hash_bitmask([], Rest, (Acc bsl Size) + ones(Size)); + compute_hash_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); compute_hash_bitmask([_ | Tail], [{hash, level, Size} | Rest], Acc) -> - compute_hash_bitmask(Tail, Rest, (Acc bsl Size) + ones(Size)); + compute_hash_bitmask(Tail, Rest, bitwise_concat(Acc, ones(Size), Size)); compute_hash_bitmask(_, [{hash, levels, Size} | Rest], Acc) -> - compute_hash_bitmask([], Rest, (Acc bsl Size) + ones(Size)); + compute_hash_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); compute_hash_bitmask(_, [], Acc) -> Acc. compute_time_bitmask([{timestamp, _, Size} | Rest], Acc) -> - compute_time_bitmask(Rest, (Acc bsl Size) + ones(Size)); + compute_time_bitmask(Rest, bitwise_concat(Acc, ones(Size), Size)); compute_time_bitmask([{hash, _, Size} | Rest], Acc) -> - compute_time_bitmask(Rest, (Acc bsl Size) + 0); + compute_time_bitmask(Rest, bitwise_concat(Acc, 0, Size)); compute_time_bitmask([], Acc) -> Acc. +bitwise_concat(Acc, Item, ItemSize) -> + (Acc bsl ItemSize) bor Item. + ones(Bits) -> 1 bsl Bits - 1. From 120d4e66ae878776d3e0bf7e53185af220c1e2a0 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 1 Jan 2023 15:04:59 +0100 Subject: [PATCH 11/49] refactor(ds): Factor out configuration to a separate module --- apps/emqx_replay/src/emqx_replay_conf.erl | 46 +++++++++++++++++++ .../src/emqx_replay_local_store.erl | 17 +------ 2 files changed, 48 insertions(+), 15 deletions(-) create mode 100644 apps/emqx_replay/src/emqx_replay_conf.erl diff --git a/apps/emqx_replay/src/emqx_replay_conf.erl b/apps/emqx_replay/src/emqx_replay_conf.erl new file mode 100644 index 000000000..b7d472918 --- /dev/null +++ b/apps/emqx_replay/src/emqx_replay_conf.erl @@ -0,0 +1,46 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_replay_conf). + +%% TODO: make a proper HOCON schema and all... + +%% API: +-export([zone_config/1, db_options/0]). + +%%================================================================================ +%% API funcions +%%================================================================================ + +-define(APP, emqx_replay). + +-spec zone_config(emqx_types:zone()) -> + {module(), term()}. +zone_config(Zone) -> + DefaultConf = + #{ + timestamp_bits => 64, + topic_bits_per_level => [8, 8, 8, 32, 16], + max_tau => 5 + }, + DefaultZoneConfig = application:get_env( + ?APP, default_zone_config, {emqx_replay_message_storage, DefaultConf} + ), + Zones = application:get_env(?APP, zone_config, #{}), + maps:get(Zone, Zones, DefaultZoneConfig). + +-spec db_options() -> emqx_replay_local_store:db_options(). +db_options() -> + application:get_env(?APP, db_options, []). diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 0c1eb4171..78dd941be 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -168,7 +168,7 @@ ensure_current_generation(S = #s{zone = Zone, db = DBHandle, column_families = C create_new_generation_schema( GenId, S = #s{zone = Zone, db = DBHandle, column_families = CFs} ) -> - {Module, Options} = new_generation_config(Zone), + {Module, Options} = emqx_replay_conf:zone_config(Zone), {NewGenData, NewCFs} = Module:create_new(DBHandle, GenId, Options), NewGen = #generation{ module = Module, @@ -178,23 +178,10 @@ create_new_generation_schema( ok = schema_put_gen(DBHandle, GenId, NewGen), S#s{column_families = NewCFs ++ CFs}. --spec new_generation_config(emqx_types:zone()) -> - {module(), term()}. -new_generation_config(Zone) -> - %% TODO: make a proper HOCON schema and all... - Zones = application:get_env(emqx_replay, zone_config, #{}), - DefaultConf = - #{ - timestamp_bits => 64, - topic_bits_per_level => [8, 8, 8, 32, 16], - max_tau => 5 - }, - maps:get(Zone, Zones, {emqx_replay_message_storage, DefaultConf}). - -spec open_db(emqx_types:zone()) -> {ok, rocksdb:db_handle(), cf_refs()} | {error, _TODO}. open_db(Zone) -> Filename = atom_to_list(Zone), - DBOptions = application:get_env(emqx_replay, db_options, []), + DBOptions = emqx_replay_conf:db_options(), ColumnFamiles = case rocksdb:list_column_families(Filename, DBOptions) of {ok, ColumnFamiles0} -> From adcbf40d27b4abc479e8aa2fbac398a82580d20b Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 2 Jan 2023 15:15:28 +0100 Subject: [PATCH 12/49] refactor(ds): tau -> epoch --- apps/emqx_replay/src/emqx_replay_conf.erl | 2 +- .../src/emqx_replay_message_storage.erl | 27 ++++++++++--------- .../test/emqx_replay_storage_SUITE.erl | 4 +-- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_conf.erl b/apps/emqx_replay/src/emqx_replay_conf.erl index b7d472918..8f7105312 100644 --- a/apps/emqx_replay/src/emqx_replay_conf.erl +++ b/apps/emqx_replay/src/emqx_replay_conf.erl @@ -33,7 +33,7 @@ zone_config(Zone) -> #{ timestamp_bits => 64, topic_bits_per_level => [8, 8, 8, 32, 16], - max_tau => 5 + epoch => 5 }, DefaultZoneConfig = application:get_env( ?APP, default_zone_config, {emqx_replay_message_storage, DefaultConf} diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index f2b45d221..484cd7ae6 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -43,13 +43,14 @@ %% | | | %% most significant topic hash least significant %% bits of timestamp bits of timestamp +%% (a.k.a epoch) (a.k.a time offset) %% %% Topic hash is level-aware: each topic level is hashed separately %% and the resulting hashes are bitwise-concatentated. This allows us %% to map topics to fixed-length bitstrings while keeping some degree %% of information about the hierarchy. %% -%% Next important concept is what we call "tau-interval". It is time +%% Next important concept is what we call "epoch". It is time %% interval determined by the number of least significant bits of the %% timestamp found at the tail of the rocksdb key. %% @@ -71,11 +72,11 @@ %% | ---->------| ---->------| ----------> %% | %% -+------------+-----------------------------> t -%% tau +%% epoch %% %% This structure allows to quickly seek to a the first message that -%% was recorded in a certain tau-interval in a certain topic or a -%% group of topics matching filter like `foo/bar/+/+' or `foo/bar/#`. +%% was recorded in a certain epoch in a certain topic or a +%% group of topics matching filter like `foo/bar/#`. %% %% Due to its structure, for each pair of rocksdb keys K1 and K2, such %% that K1 > K2 and topic(K1) = topic(K2), timestamp(K1) > @@ -176,7 +177,7 @@ -record(keymapper, { source :: [bitsource(), ...], bitsize :: bits(), - tau :: non_neg_integer() + epoch :: non_neg_integer() }). -type bitsource() :: @@ -229,14 +230,14 @@ open(DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) -> %% Number of bits in a key allocated to each level in a message topic. topic_bits_per_level := bits_per_level(), %% Maximum granularity of iteration over time. - max_tau := time() + epoch := time() }. make_keymapper(#{ timestamp_bits := TimestampBits, topic_bits_per_level := BitsPerLevel, - max_tau := MaxTau + epoch := MaxEpoch }) -> - TimestampLSBs = floor(math:log2(MaxTau)), + TimestampLSBs = floor(math:log2(MaxEpoch)), TimestampMSBs = TimestampBits - TimestampLSBs, NLevels = length(BitsPerLevel), {LevelBits, [TailLevelsBits]} = lists:split(NLevels - 1, BitsPerLevel), @@ -249,7 +250,7 @@ make_keymapper(#{ #keymapper{ source = Source, bitsize = lists:sum([S || {_, _, S} <- Source]), - tau = 1 bsl TimestampLSBs + epoch = 1 bsl TimestampLSBs }. -spec store(db(), emqx_guid:guid(), time(), topic(), binary()) -> @@ -540,12 +541,12 @@ make_keymapper_test_() -> {timestamp, 0, 9} ], bitsize = 46, - tau = 512 + epoch = 512 }, make_keymapper(#{ timestamp_bits => 32, topic_bits_per_level => [2, 4, 8], - max_tau => 1000 + epoch => 1000 }) ), ?_assertEqual( @@ -555,12 +556,12 @@ make_keymapper_test_() -> {hash, levels, 16} ], bitsize = 48, - tau = 1 + epoch = 1 }, make_keymapper(#{ timestamp_bits => 32, topic_bits_per_level => [16], - max_tau => 1 + epoch => 1 }) ) ]. diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index 237823014..f604f8d63 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -143,7 +143,7 @@ t_prop_topic_hash_computes(_) -> Keymapper = emqx_replay_message_storage:make_keymapper(#{ timestamp_bits => 32, topic_bits_per_level => [8, 12, 16, 24], - max_tau => 10000 + epoch => 10000 }), ?assert( proper:quickcheck( @@ -158,7 +158,7 @@ t_prop_hash_bitmask_computes(_) -> Keymapper = emqx_replay_message_storage:make_keymapper(#{ timestamp_bits => 16, topic_bits_per_level => [8, 12, 16], - max_tau => 100 + epoch => 100 }), ?assert( proper:quickcheck( From f1e39d977a449959ab89ad9045cddb5dd2c5dbf9 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 2 Jan 2023 15:58:45 +0100 Subject: [PATCH 13/49] fix(ds): Fix dialyzer warnings --- .../src/emqx_replay_local_store.erl | 14 +++---- .../src/emqx_replay_message_storage.erl | 38 ++++++++----------- .../test/emqx_replay_storage_SUITE.erl | 8 ++-- 3 files changed, 27 insertions(+), 33 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 78dd941be..23cedb04c 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -38,7 +38,7 @@ -type db_write_options() :: proplists:proplist(). --type cf_refs() :: [{_CFName :: string(), _CFRef :: reference()}]. +-type cf_refs() :: [{string(), rocksdb:cf_handle()}]. -record(generation, { %% Module that handles data for the generation @@ -142,12 +142,12 @@ terminate(_Reason, #s{db = DB, zone = Zone}) -> %% Internal functions %%================================================================================ --spec read_metadata(#s{}) -> #s{}. +-spec read_metadata(#s{}) -> ok. read_metadata(S) -> %% TODO: just a mockup to make the existing tests pass read_metadata(0, S). --spec read_metadata(gen_id(), #s{}) -> #s{}. +-spec read_metadata(gen_id(), #s{}) -> ok. read_metadata(GenId, S = #s{zone = Zone, db = DBHandle, column_families = CFs}) -> Gen = #generation{module = Mod, data = Data} = schema_get_gen(DBHandle, GenId), DB = Mod:open(DBHandle, GenId, CFs, Data), @@ -206,12 +206,12 @@ open_db(Zone) -> -spec schema_get_gen(rocksdb:db_handle(), gen_id()) -> #generation{}. schema_get_gen(DBHandle, GenId) -> - {ok, Bin} = rocksdb:get(DBHandle, gen_rocksdb_key(GenId), ?SCHEMA_READ_OPTS), + {ok, Bin} = rocksdb:get(DBHandle, schema_gen_key(GenId), ?SCHEMA_READ_OPTS), binary_to_term(Bin). -spec schema_put_gen(rocksdb:db_handle(), gen_id(), #generation{}) -> ok | {error, _}. schema_put_gen(DBHandle, GenId, Gen) -> - rocksdb:put(DBHandle, gen_rocksdb_key(GenId), term_to_binary(Gen), ?SCHEMA_WRITE_OPTS). + rocksdb:put(DBHandle, schema_gen_key(GenId), term_to_binary(Gen), ?SCHEMA_WRITE_OPTS). -spec schema_get_current(rocksdb:db_handle()) -> gen_id() | undefined. schema_get_current(DBHandle) -> @@ -226,8 +226,8 @@ schema_get_current(DBHandle) -> schema_put_current(DBHandle, GenId) -> rocksdb:put(DBHandle, ?CURRENT_GEN, integer_to_binary(GenId), ?SCHEMA_WRITE_OPTS). --spec gen_rocksdb_key(integer()) -> string(). -gen_rocksdb_key(N) -> +-spec schema_gen_key(integer()) -> binary(). +schema_gen_key(N) -> <<"gen", N:32>>. -undef(CURRENT_GEN). diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 484cd7ae6..a622cdde0 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -105,7 +105,7 @@ hash/2 ]). --export_type([db/0, iterator/0]). +-export_type([db/0, iterator/0, schema/0]). -compile({inline, [ones/1, bitwise_concat/3]}). @@ -131,9 +131,12 @@ -type bits_per_level() :: [bits(), ...]. -type options() :: #{ - %% Keymapper. - keymapper := keymapper(), - %% Name and options to use to open specific column family. + %% Number of bits in a message timestamp. + timestamp_bits := bits(), + %% Number of bits in a key allocated to each level in a message topic. + topic_bits_per_level := bits_per_level(), + %% Maximum granularity of iteration over time. + epoch := time(), cf_options => emqx_replay_local_store:db_cf_options() }. @@ -151,7 +154,7 @@ %% record when the database is reopened -record(schema, {keymapper :: keymapper()}). --type schema() :: #schema{}. +-opaque schema() :: #schema{}. -record(db, { handle :: rocksdb:db_handle(), @@ -196,8 +199,9 @@ %%================================================================================ %% Create a new column family for the generation and a serializable representation of the schema --spec create_new(rocksdb:db_handle(), emqx_replay_local_store:generation_id(), options()) -> +-spec create_new(rocksdb:db_handle(), emqx_replay_local_store:gen_id(), options()) -> {schema(), emqx_replay_local_store:cf_refs()}. +%{schema(), emqx_replay_local_store:cf_refs()}. create_new(DBHandle, GenId, Options) -> CFName = data_cf(GenId), CFOptions = maps:get(cf_options, Options, []), @@ -208,30 +212,20 @@ create_new(DBHandle, GenId, Options) -> %% Reopen the database -spec open( rocksdb:db_handle(), - emqx_replay_local_store:generation_id(), - [{_CFName :: string(), _CFHandle :: reference()}], + emqx_replay_local_store:gen_id(), + emqx_replay_local_store:cf_refs(), schema() ) -> db(). open(DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) -> - CFHandle = proplists:get_value(data_cf(GenId), CFs), - % assert - true = is_reference(CFHandle), + {value, {_, CFHandle}} = lists:keysearch(data_cf(GenId), 1, CFs), #db{ handle = DBHandle, cf = CFHandle, keymapper = Keymapper }. --spec make_keymapper(Options) -> keymapper() when - Options :: #{ - %% Number of bits in a message timestamp. - timestamp_bits := bits(), - %% Number of bits in a key allocated to each level in a message topic. - topic_bits_per_level := bits_per_level(), - %% Maximum granularity of iteration over time. - epoch := time() - }. +-spec make_keymapper(options()) -> keymapper(). make_keymapper(#{ timestamp_bits := TimestampBits, topic_bits_per_level := BitsPerLevel, @@ -313,7 +307,7 @@ make_message_value(Topic, MessagePayload) -> unwrap_message_value(Binary) -> binary_to_term(Binary). --spec combine(_Bitstring :: integer(), emqx_guid:guid(), keymapper()) -> +-spec combine(_Bitstring :: integer(), emqx_guid:guid() | <<>>, keymapper()) -> key(). combine(Bitstring, MessageID, #keymapper{bitsize = Size}) -> <>. @@ -521,7 +515,7 @@ substring(I, Offset, Size) -> (I bsr Offset) band ones(Size). %% @doc Generate a column family ID for the MQTT messages --spec data_cf(emqx_replay_local_store:gen_id()) -> string(). +-spec data_cf(emqx_replay_local_store:gen_id()) -> [char()]. data_cf(GenId) -> ?MODULE_STRING ++ integer_to_list(GenId). diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index f604f8d63..a0424541a 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -25,12 +25,12 @@ -define(ZONE, zone(?FUNCTION_NAME)). %% Smoke test for opening and reopening the database -t_open(Config) -> +t_open(_Config) -> ok = emqx_replay_local_store_sup:stop_zone(?ZONE), {ok, _} = emqx_replay_local_store_sup:start_zone(?ZONE). %% Smoke test of store function -t_store(Config) -> +t_store(_Config) -> MessageID = emqx_guid:gen(), PublishedAt = 1000, Topic = [<<"foo">>, <<"bar">>], @@ -38,7 +38,7 @@ t_store(Config) -> ?assertMatch(ok, emqx_replay_local_store:store(?ZONE, MessageID, PublishedAt, Topic, Payload)). %% Smoke test for iteration through a concrete topic -t_iterate(Config) -> +t_iterate(_Config) -> %% Prepare data: Topics = [[<<"foo">>, <<"bar">>], [<<"foo">>, <<"bar">>, <<"baz">>], [<<"a">>]], Timestamps = lists:seq(1, 10), @@ -64,7 +64,7 @@ t_iterate(Config) -> ok. %% Smoke test for iteration with wildcard topic filter -t_iterate_wildcard(Config) -> +t_iterate_wildcard(_Config) -> %% Prepare data: Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], Timestamps = lists:seq(1, 10), From a0bcdb51048beddb2ca7977eee68dbf16d17dfed Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 3 Jan 2023 18:29:06 +0300 Subject: [PATCH 14/49] chore(ds): Attempt to make `compute_next_seek`'s logic clearer --- .../src/emqx_replay_message_storage.erl | 174 ++++++++++++------ .../test/emqx_replay_storage_SUITE.erl | 4 +- 2 files changed, 121 insertions(+), 57 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index a622cdde0..aeb3ebad7 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -102,6 +102,9 @@ make_message_key/4, compute_bitstring/3, compute_hash_bitmask/2, + compute_next_seek/4, + compute_time_seek/3, + compute_hash_seek/4, hash/2 ]). @@ -231,12 +234,12 @@ make_keymapper(#{ topic_bits_per_level := BitsPerLevel, epoch := MaxEpoch }) -> - TimestampLSBs = floor(math:log2(MaxEpoch)), + TimestampLSBs = min(TimestampBits, floor(math:log2(MaxEpoch))), TimestampMSBs = TimestampBits - TimestampLSBs, NLevels = length(BitsPerLevel), {LevelBits, [TailLevelsBits]} = lists:split(NLevels - 1, BitsPerLevel), Source = lists:flatten([ - {timestamp, TimestampLSBs, TimestampMSBs}, + [{timestamp, TimestampLSBs, TimestampMSBs} || TimestampMSBs > 0], [{hash, level, Bits} || Bits <- LevelBits], {hash, levels, TailLevelsBits}, [{timestamp, 0, TimestampLSBs} || TimestampLSBs > 0] @@ -408,8 +411,8 @@ match_next( ) -> HashMatches = (Bitstring band HashBitmask) == HashBitfilter, TimeMatches = (Bitstring band TimeBitmask) >= TimeBitfilter, - case HashMatches of - true when TimeMatches -> + case HashMatches and TimeMatches of + true -> {Topic, MessagePayload} = unwrap_message_value(Value), case emqx_topic:match(Topic, TopicFilter) of true -> @@ -417,13 +420,8 @@ match_next( false -> next(It#it{next_action = next}) end; - true when not TimeMatches -> - NextBitstring = (Bitstring band (bnot TimeBitmask)) bor TimeBitfilter, - NextSeek = combine(NextBitstring, <<>>, Keymapper), - next(It#it{next_action = {seek, NextSeek}}); false -> - % _ -> - case compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) of + case compute_next_seek(HashMatches, TimeMatches, Bitstring, It) of NextBitstring when is_integer(NextBitstring) -> % ct:pal("Bitstring = ~32.16.0B", [Bitstring]), % ct:pal("Bitfilter = ~32.16.0B", [Bitfilter]), @@ -441,62 +439,128 @@ stop_iteration(It) -> ok = rocksdb:iterator_close(It#it.handle), none. -compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) -> +%% `Bitstring` is out of the hash space defined by `HashBitfilter`. +compute_next_seek(_HashMatches = false, _, Bitstring, It) -> + NextBitstring = compute_hash_seek( + Bitstring, + It#it.hash_bitfilter, + It#it.hash_bitmask, + It#it.keymapper + ), + case NextBitstring of + none -> + none; + _ -> + TimeMatches = (NextBitstring band It#it.time_bitmask) >= It#it.time_bitfilter, + compute_next_seek(true, TimeMatches, NextBitstring, It) + end; +%% `Bitstring` is out of the time range defined by `TimeBitfilter`. +compute_next_seek(_HashMatches = true, _TimeMatches = false, Bitstring, It) -> + compute_time_seek(Bitstring, It#it.time_bitfilter, It#it.time_bitmask); +compute_next_seek(true, true, Bitstring, _It) -> + Bitstring. + +compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask) -> + % Replace the bits of the timestamp in `Bistring` with bits from `Timebitfilter`. + (Bitstring band (bnot TimeBitmask)) bor TimeBitfilter. + +%% Find the closest bitstring which is: +%% * greater than `Bitstring`, +%% * and falls into the hash space defined by `HashBitfilter`. +%% Note that the result can end up "back" in time and out of the time range. +compute_hash_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) -> Sources = Keymapper#keymapper.source, Size = Keymapper#keymapper.bitsize, - compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size). + compute_hash_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size). -compute_next_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) -> +compute_hash_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) -> % NOTE - % Ok, this convoluted mess implements a sort of _increment operation_ for some - % strange number in variable bit-width base. There are `Levels` "digits", those - % with `0` level bitmask have `BitsPerLevel` bit-width and those with `111...` - % level bitmask have in some sense 0 bits (because they are fixed "digits" - % with exacly one possible value). - % TODO make at least remotely readable / optimize later - Result = zipfoldr3( - fun(Source, Substring, Filter, LBitmask, Offset, {Carry, Acc}) -> + % We're iterating through `Substring` here, in lockstep with `HashBitfilter` + % and`HashBitmask`, starting from least signigicant bits. Each bitsource in + % `Sources` has a bitsize `S` and, accordingly, gives us a sub-bitstring `S` + % bits long which we interpret as a "digit". There are 2 flavors of those + % "digits": + % * regular digit with 2^S possible values, + % * degenerate digit with exactly 1 possible value U (represented with 0). + % Our goal here is to find a successor of `Bistring` and perform a kind of + % digit-by-digit addition operation with carry propagation. + NextSeek = zipfoldr3( + fun(Source, Substring, Filter, LBitmask, Offset, Acc) -> case Source of - {hash, _, _} when LBitmask =:= 0, Carry =:= 0 -> - {0, Acc + (Substring bsl Offset)}; {hash, _, S} when LBitmask =:= 0 -> - Substring1 = Substring + Carry, - Carry1 = Substring1 bsr S, - Acc1 = (Substring1 band ones(S)) bsl Offset, - {Carry1, Acc1}; - {hash, _, _} when LBitmask =/= 0, (Substring + Carry) =:= Filter -> - {0, Acc + (Filter bsl Offset)}; - {hash, _, _} when LBitmask =/= 0, (Substring + Carry) > Filter -> - {1, Filter bsl Offset}; - {hash, _, _} when LBitmask =/= 0 -> - {0, Filter bsl Offset}; - {timestamp, _, _} when Carry =:= 0 -> - {0, Acc + (Substring bsl Offset)}; + % Regular case + bitwise_add_digit(Substring, Acc, S, Offset); + {hash, _, _} when LBitmask =/= 0, Substring < Filter -> + % Degenerate case, I_digit < U, no overflow. + % Successor is `U bsl Offset` which is equivalent to 0. + 0; + {hash, _, S} when LBitmask =/= 0, Substring > Filter -> + % Degenerate case, I_digit > U, overflow. + % Successor is `(1 bsl Size + U) bsl Offset`. + overflow_digit(S, Offset); + {hash, _, S} when LBitmask =/= 0 -> + % Degenerate case, I_digit = U + % Perform digit addition with I_digit = 0, assuming "digit" has + % 0 bits of information (but is `S` bits long at the same time). + % This will overflow only if the result of previous iteration + % was an overflow. + bitwise_add_digit(0, Acc, 0, S, Offset); {timestamp, _, S} -> - Substring1 = Substring + Carry, - Carry1 = Substring1 bsr S, - Acc1 = (Substring1 band ones(S)) bsl Offset, - {Carry1, Acc1} + % Regular case + bitwise_add_digit(Substring, Acc, S, Offset) end end, - % TODO - % We can put carry bit into the `Acc`'s MSB instead of wrapping it into a tuple. - % This could save us a heap alloc which might be imporatant in a hot path. - {1, 0}, + 0, Bitstring, HashBitfilter, HashBitmask, Size, Sources ), - case Result of - {_Carry = 0, Next} -> - Next bor (HashBitfilter band HashBitmask); - {_Carry = 1, _} -> - % we got "carried away" past the range, time to stop iteration + case NextSeek bsr Size of + _Carry = 0 -> + % Found the successor. + % We need to recover values of those degenerate digits which we + % represented with 0 during digit-by-digit iteration. + NextSeek bor (HashBitfilter band HashBitmask); + _Carry = 1 -> + % We got "carried away" past the range, time to stop iteration. none end. +bitwise_add_digit(Digit, Number, Width, Offset) -> + bitwise_add_digit(Digit, Number, Width, Width, Offset). + +%% Add "digit" (represented with integer `Digit`) to the `Number` assuming +%% this digit starts at `Offset` bits in `Number` and is `Width` bits long. +%% Perform an overflow if the result of addition would not fit into `Bits` +%% bits. +bitwise_add_digit(Digit, Number, Bits, Width, Offset) -> + Sum = (Digit bsl Offset) + Number, + case (Sum bsr Offset) < (1 bsl Bits) of + true -> Sum; + false -> overflow_digit(Width, Offset) + end. + +%% Constuct a number which denotes an overflow of digit that starts at +%% `Offset` bits and is `Width` bits long. +overflow_digit(Width, Offset) -> + (1 bsl Width) bsl Offset. + +%% Iterate through sub-bitstrings of 3 integers in lockstep, starting from least +%% significant bits first. +%% +%% Each integer is assumed to be `Size` bits long. Lengths of sub-bitstring are +%% specified in `Sources` list, in order from most significant bits to least +%% significant. Each iteration calls `FoldFun` with: +%% * bitsource that was used to extract sub-bitstrings, +%% * 3 sub-bitstrings in integer representation, +%% * bit offset into integers, +%% * current accumulator. +-spec zipfoldr3(FoldFun, Acc, integer(), integer(), integer(), _Size :: bits(), [bitsource()]) -> + Acc +when + FoldFun :: fun((bitsource(), integer(), integer(), integer(), _Offset :: bits(), Acc) -> Acc). zipfoldr3(_FoldFun, Acc, _, _, _, 0, []) -> Acc; zipfoldr3(FoldFun, Acc, I1, I2, I3, Offset, [Source = {_, _, S} | Rest]) -> @@ -619,8 +683,8 @@ wildcard_bitmask_test_() -> %% Key3 = |123|999|679|001| → Seek = 1 |123|000|678|000| → eos %% Key4 = |125|011|179|017| → Seek = 1 |123|000|678|000| → eos -compute_test_next_seek(Bitstring, Bitfilter, HBitmask) -> - compute_next_seek( +compute_test_hash_seek(Bitstring, Bitfilter, HBitmask) -> + compute_hash_seek( Bitstring, Bitfilter, HBitmask, @@ -637,7 +701,7 @@ next_seek_test_() -> [ ?_assertMatch( none, - compute_test_next_seek( + compute_test_hash_seek( 16#FD_42_4242_043, 16#FD_42_4242_042, 16#FF_FF_FFFF_FFF @@ -645,7 +709,7 @@ next_seek_test_() -> ), ?_assertMatch( 16#FD_11_0678_000, - compute_test_next_seek( + compute_test_hash_seek( 16#FD_11_0108_121, 16#FD_00_0678_000, 16#FF_00_FFFF_000 @@ -653,7 +717,7 @@ next_seek_test_() -> ), ?_assertMatch( 16#FD_12_0678_000, - compute_test_next_seek( + compute_test_hash_seek( 16#FD_11_0679_919, 16#FD_00_0678_000, 16#FF_00_FFFF_000 @@ -661,7 +725,7 @@ next_seek_test_() -> ), ?_assertMatch( none, - compute_test_next_seek( + compute_test_hash_seek( 16#FD_FF_0679_001, 16#FD_00_0678_000, 16#FF_00_FFFF_000 @@ -669,7 +733,7 @@ next_seek_test_() -> ), ?_assertMatch( none, - compute_test_next_seek( + compute_test_hash_seek( 16#FE_11_0179_017, 16#FD_00_0678_000, 16#FF_00_FFFF_000 diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index a0424541a..550094e2d 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -169,7 +169,7 @@ t_prop_hash_bitmask_computes(_) -> ) ). -t_prop_iterate_stored_messages(Config) -> +t_prop_iterate_stored_messages(_) -> ?assertEqual( true, proper:quickcheck( @@ -256,7 +256,7 @@ init_per_testcase(TC, Config) -> {ok, _} = emqx_replay_local_store_sup:start_zone(zone(TC)), Config. -end_per_testcase(_TC, Config) -> +end_per_testcase(_TC, _Config) -> ok = application:stop(emqx_replay). zone(TC) -> From 85d3783ff13bf4fdaa8972f5020f28b490a97578 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 3 Jan 2023 18:30:43 +0300 Subject: [PATCH 15/49] chore(ds): Drop few unused macrodefinitions --- apps/emqx_replay/src/emqx_replay_message_storage.erl | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index aeb3ebad7..3236a6593 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -143,16 +143,6 @@ cf_options => emqx_replay_local_store:db_cf_options() }. --define(DEFAULT_COLUMN_FAMILY, {"default", []}). - --define(DEFAULT_OPEN_OPTIONS, [ - {create_if_missing, true}, - {create_missing_column_families, true} -]). - --define(DEFAULT_WRITE_OPTIONS, [{sync, true}]). --define(DEFAULT_READ_OPTIONS, []). - %% Persistent configuration of the generation, it is used to create db %% record when the database is reopened -record(schema, {keymapper :: keymapper()}). From 862e17cd788786293ddee27bb7a77266aebcecf0 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 3 Jan 2023 18:34:42 +0300 Subject: [PATCH 16/49] chore(ds): assign CODEOWNER --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4ad6049f3..3d6ab6c37 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -18,6 +18,7 @@ /apps/emqx_rule_engine/ @emqx/emqx-review-board @kjellwinblad /apps/emqx_slow_subs/ @emqx/emqx-review-board @lafirest /apps/emqx_statsd/ @emqx/emqx-review-board @JimMoen +/apps/emqx_durable_storage/ @ieQu1 ## CI /deploy/ @emqx/emqx-review-board @Rory-Z From c5826cee566bb6ae6e15118a7e2cacdaa6264c77 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 3 Jan 2023 18:53:19 +0300 Subject: [PATCH 17/49] =?UTF-8?q?refactor(ds):=20rename=20`compute=5Fhash?= =?UTF-8?q?=5F*`=20=E2=86=92=20`compute=5Ftopic=5F*`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/emqx_replay_message_storage.erl | 62 +++++++++---------- .../test/emqx_replay_storage_SUITE.erl | 4 +- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 3236a6593..dafaf8c68 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -101,10 +101,10 @@ -export([ make_message_key/4, compute_bitstring/3, - compute_hash_bitmask/2, + compute_topic_bitmask/2, compute_next_seek/4, compute_time_seek/3, - compute_hash_seek/4, + compute_topic_seek/4, hash/2 ]). @@ -255,7 +255,7 @@ make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of {ok, ITHandle} -> Bitstring = compute_bitstring(TopicFilter, StartTime, DB#db.keymapper), - HashBitmask = compute_hash_bitmask(TopicFilter, DB#db.keymapper), + HashBitmask = compute_topic_bitmask(TopicFilter, DB#db.keymapper), TimeBitmask = compute_time_bitmask(DB#db.keymapper), HashBitfilter = Bitstring band HashBitmask, TimeBitfilter = Bitstring band TimeBitmask, @@ -315,9 +315,9 @@ extract(Key, #keymapper{bitsize = Size}) -> compute_bitstring(Topic, Timestamp, #keymapper{source = Source}) -> compute_bitstring(Topic, Timestamp, Source, 0). --spec compute_hash_bitmask(emqx_topic:words(), keymapper()) -> integer(). -compute_hash_bitmask(TopicFilter, #keymapper{source = Source}) -> - compute_hash_bitmask(TopicFilter, Source, 0). +-spec compute_topic_bitmask(emqx_topic:words(), keymapper()) -> integer(). +compute_topic_bitmask(TopicFilter, #keymapper{source = Source}) -> + compute_topic_bitmask(TopicFilter, Source, 0). -spec compute_time_bitmask(keymapper()) -> integer(). compute_time_bitmask(#keymapper{source = Source}) -> @@ -346,19 +346,19 @@ compute_bitstring(Tail, Timestamp, [{hash, levels, Size} | Rest], Acc) -> compute_bitstring(_, _, [], Acc) -> Acc. -compute_hash_bitmask(Filter, [{timestamp, _, Size} | Rest], Acc) -> - compute_hash_bitmask(Filter, Rest, bitwise_concat(Acc, 0, Size)); -compute_hash_bitmask(['#'], [{hash, _, Size} | Rest], Acc) -> - compute_hash_bitmask(['#'], Rest, bitwise_concat(Acc, 0, Size)); -compute_hash_bitmask(['+' | Tail], [{hash, _, Size} | Rest], Acc) -> - compute_hash_bitmask(Tail, Rest, bitwise_concat(Acc, 0, Size)); -compute_hash_bitmask([], [{hash, level, Size} | Rest], Acc) -> - compute_hash_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); -compute_hash_bitmask([_ | Tail], [{hash, level, Size} | Rest], Acc) -> - compute_hash_bitmask(Tail, Rest, bitwise_concat(Acc, ones(Size), Size)); -compute_hash_bitmask(_, [{hash, levels, Size} | Rest], Acc) -> - compute_hash_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); -compute_hash_bitmask(_, [], Acc) -> +compute_topic_bitmask(Filter, [{timestamp, _, Size} | Rest], Acc) -> + compute_topic_bitmask(Filter, Rest, bitwise_concat(Acc, 0, Size)); +compute_topic_bitmask(['#'], [{hash, _, Size} | Rest], Acc) -> + compute_topic_bitmask(['#'], Rest, bitwise_concat(Acc, 0, Size)); +compute_topic_bitmask(['+' | Tail], [{hash, _, Size} | Rest], Acc) -> + compute_topic_bitmask(Tail, Rest, bitwise_concat(Acc, 0, Size)); +compute_topic_bitmask([], [{hash, level, Size} | Rest], Acc) -> + compute_topic_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); +compute_topic_bitmask([_ | Tail], [{hash, level, Size} | Rest], Acc) -> + compute_topic_bitmask(Tail, Rest, bitwise_concat(Acc, ones(Size), Size)); +compute_topic_bitmask(_, [{hash, levels, Size} | Rest], Acc) -> + compute_topic_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); +compute_topic_bitmask(_, [], Acc) -> Acc. compute_time_bitmask([{timestamp, _, Size} | Rest], Acc) -> @@ -431,7 +431,7 @@ stop_iteration(It) -> %% `Bitstring` is out of the hash space defined by `HashBitfilter`. compute_next_seek(_HashMatches = false, _, Bitstring, It) -> - NextBitstring = compute_hash_seek( + NextBitstring = compute_topic_seek( Bitstring, It#it.hash_bitfilter, It#it.hash_bitmask, @@ -458,12 +458,12 @@ compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask) -> %% * greater than `Bitstring`, %% * and falls into the hash space defined by `HashBitfilter`. %% Note that the result can end up "back" in time and out of the time range. -compute_hash_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) -> +compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) -> Sources = Keymapper#keymapper.source, Size = Keymapper#keymapper.bitsize, - compute_hash_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size). + compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size). -compute_hash_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) -> +compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) -> % NOTE % We're iterating through `Substring` here, in lockstep with `HashBitfilter` % and`HashBitmask`, starting from least signigicant bits. Each bitsource in @@ -615,7 +615,7 @@ make_keymapper_test_() -> ]. compute_test_bitmask(TopicFilter) -> - compute_hash_bitmask( + compute_topic_bitmask( TopicFilter, [ {hash, level, 3}, @@ -673,8 +673,8 @@ wildcard_bitmask_test_() -> %% Key3 = |123|999|679|001| → Seek = 1 |123|000|678|000| → eos %% Key4 = |125|011|179|017| → Seek = 1 |123|000|678|000| → eos -compute_test_hash_seek(Bitstring, Bitfilter, HBitmask) -> - compute_hash_seek( +compute_test_topic_seek(Bitstring, Bitfilter, HBitmask) -> + compute_topic_seek( Bitstring, Bitfilter, HBitmask, @@ -691,7 +691,7 @@ next_seek_test_() -> [ ?_assertMatch( none, - compute_test_hash_seek( + compute_test_topic_seek( 16#FD_42_4242_043, 16#FD_42_4242_042, 16#FF_FF_FFFF_FFF @@ -699,7 +699,7 @@ next_seek_test_() -> ), ?_assertMatch( 16#FD_11_0678_000, - compute_test_hash_seek( + compute_test_topic_seek( 16#FD_11_0108_121, 16#FD_00_0678_000, 16#FF_00_FFFF_000 @@ -707,7 +707,7 @@ next_seek_test_() -> ), ?_assertMatch( 16#FD_12_0678_000, - compute_test_hash_seek( + compute_test_topic_seek( 16#FD_11_0679_919, 16#FD_00_0678_000, 16#FF_00_FFFF_000 @@ -715,7 +715,7 @@ next_seek_test_() -> ), ?_assertMatch( none, - compute_test_hash_seek( + compute_test_topic_seek( 16#FD_FF_0679_001, 16#FD_00_0678_000, 16#FF_00_FFFF_000 @@ -723,7 +723,7 @@ next_seek_test_() -> ), ?_assertMatch( none, - compute_test_hash_seek( + compute_test_topic_seek( 16#FE_11_0179_017, 16#FD_00_0678_000, 16#FF_00_FFFF_000 diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index 550094e2d..30850927b 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -154,7 +154,7 @@ t_prop_topic_hash_computes(_) -> ) ). -t_prop_hash_bitmask_computes(_) -> +t_prop_topic_bitmask_computes(_) -> Keymapper = emqx_replay_message_storage:make_keymapper(#{ timestamp_bits => 16, topic_bits_per_level => [8, 12, 16], @@ -163,7 +163,7 @@ t_prop_hash_bitmask_computes(_) -> ?assert( proper:quickcheck( ?FORALL(TopicFilter, topic_filter(), begin - Mask = emqx_replay_message_storage:compute_hash_bitmask(TopicFilter, Keymapper), + Mask = emqx_replay_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) end) ) From 4c643aa7d55f6ef28b936a8e092a038f9e8b9bd6 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 4 Jan 2023 22:02:53 +0300 Subject: [PATCH 18/49] refactor(ds): Introduce keyspace filter concept So we could conveniently test it separately. --- .../src/emqx_replay_message_storage.erl | 185 ++++++++++++------ 1 file changed, 125 insertions(+), 60 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index dafaf8c68..3988e97dc 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -98,16 +98,24 @@ -export([next/1]). %% Debug/troubleshooting: +%% Keymappers -export([ - make_message_key/4, + bitsize/1, compute_bitstring/3, compute_topic_bitmask/2, - compute_next_seek/4, - compute_time_seek/3, - compute_topic_seek/4, + compute_time_bitmask/1, hash/2 ]). +%% Keyspace filters +-export([ + make_keyspace_filter/3, + compute_initial_seek/1, + compute_next_seek/2, + compute_time_seek/3, + compute_topic_seek/4 +]). + -export_type([db/0, iterator/0, schema/0]). -compile({inline, [ones/1, bitwise_concat/3]}). @@ -159,8 +167,12 @@ -record(it, { handle :: rocksdb:itr_handle(), + filter :: keyspace_filter(), + next_action :: {seek, binary()} | next +}). + +-record(filter, { keymapper :: keymapper(), - next_action :: {seek, binary()} | next, topic_filter :: emqx_topic:words(), hash_bitfilter :: integer(), hash_bitmask :: integer(), @@ -186,6 +198,7 @@ -opaque db() :: #db{}. -opaque iterator() :: #it{}. -type keymapper() :: #keymapper{}. +-type keyspace_filter() :: #filter{}. %%================================================================================ %% API funcions @@ -254,33 +267,35 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime) -> case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of {ok, ITHandle} -> - Bitstring = compute_bitstring(TopicFilter, StartTime, DB#db.keymapper), - HashBitmask = compute_topic_bitmask(TopicFilter, DB#db.keymapper), - TimeBitmask = compute_time_bitmask(DB#db.keymapper), - HashBitfilter = Bitstring band HashBitmask, - TimeBitfilter = Bitstring band TimeBitmask, - InitialSeek = combine(HashBitfilter bor TimeBitfilter, <<>>, DB#db.keymapper), + % TODO earliest + Filter = make_keyspace_filter(TopicFilter, StartTime, DB#db.keymapper), + InitialSeek = combine(compute_initial_seek(Filter), <<>>, DB#db.keymapper), {ok, #it{ handle = ITHandle, - keymapper = DB#db.keymapper, - next_action = {seek, InitialSeek}, - topic_filter = TopicFilter, - hash_bitfilter = HashBitfilter, - hash_bitmask = HashBitmask, - time_bitfilter = TimeBitfilter, - time_bitmask = TimeBitmask + filter = Filter, + next_action = {seek, InitialSeek} }}; Err -> Err end. -spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}. -next(It = #it{next_action = Action}) -> - case rocksdb:iterator_move(It#it.handle, Action) of +next(It = #it{filter = #filter{keymapper = Keymapper}}) -> + case rocksdb:iterator_move(It#it.handle, It#it.next_action) of % spec says `{ok, Key}` is also possible but the implementation says it's not {ok, Key, Value} -> - Bitstring = extract(Key, It#it.keymapper), - match_next(It, Bitstring, Value); + Bitstring = extract(Key, Keymapper), + case match_next(Bitstring, Value, It#it.filter) of + {_Topic, Payload} -> + {value, Payload, It#it{next_action = next}}; + next -> + next(It#it{next_action = next}); + NextBitstring when is_integer(NextBitstring) -> + NextSeek = combine(NextBitstring, <<>>, Keymapper), + next(It#it{next_action = {seek, NextSeek}}); + none -> + stop_iteration(It) + end; {error, invalid_iterator} -> stop_iteration(It); {error, iterator_closed} -> @@ -291,6 +306,18 @@ next(It = #it{next_action = Action}) -> %% Internal exports %%================================================================================ +-define(topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), + (Bitstring band HashBitmask) == HashBitfilter +). + +-define(time_matches(Bitstring, TimeBitfilter, TimeBitmask), + (Bitstring band TimeBitmask) >= TimeBitfilter +). + +-spec bitsize(keymapper()) -> bits(). +bitsize(#keymapper{bitsize = Bitsize}) -> + Bitsize. + make_message_key(Topic, PublishedAt, MessageID, Keymapper) -> combine(compute_bitstring(Topic, PublishedAt, Keymapper), MessageID, Keymapper). @@ -323,10 +350,46 @@ compute_topic_bitmask(TopicFilter, #keymapper{source = Source}) -> compute_time_bitmask(#keymapper{source = Source}) -> compute_time_bitmask(Source, 0). +-spec hash(term(), bits()) -> integer(). hash(Input, Bits) -> % at most 32 bits erlang:phash2(Input, 1 bsl Bits). +-spec make_keyspace_filter(emqx_topic:words(), time(), keymapper()) -> keyspace_filter(). +make_keyspace_filter(TopicFilter, StartTime, Keymapper) -> + Bitstring = compute_bitstring(TopicFilter, StartTime, Keymapper), + HashBitmask = compute_topic_bitmask(TopicFilter, Keymapper), + TimeBitmask = compute_time_bitmask(Keymapper), + HashBitfilter = Bitstring band HashBitmask, + TimeBitfilter = Bitstring band TimeBitmask, + #filter{ + keymapper = Keymapper, + topic_filter = TopicFilter, + hash_bitfilter = HashBitfilter, + hash_bitmask = HashBitmask, + time_bitfilter = TimeBitfilter, + time_bitmask = TimeBitmask + }. + +-spec compute_initial_seek(keyspace_filter()) -> integer(). +compute_initial_seek(#filter{hash_bitfilter = HashBitfilter, time_bitfilter = TimeBitfilter}) -> + % Should be the same as `compute_initial_seek(0, Filter)`. + HashBitfilter bor TimeBitfilter. + +-spec compute_next_seek(integer(), keyspace_filter()) -> integer(). +compute_next_seek( + Bitstring, + Filter = #filter{ + hash_bitfilter = HashBitfilter, + hash_bitmask = HashBitmask, + time_bitfilter = TimeBitfilter, + time_bitmask = TimeBitmask + } +) -> + HashMatches = ?topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), + TimeMatches = ?time_matches(Bitstring, TimeBitfilter, TimeBitmask), + compute_next_seek(HashMatches, TimeMatches, Bitstring, Filter). + %%================================================================================ %% Internal functions %%================================================================================ @@ -388,65 +451,63 @@ ones(Bits) -> %% |123|056|678| & |fff|000|fff| = |123|000|678|. match_next( - It = #it{ - keymapper = Keymapper, + Bitstring, + Value, + Filter = #filter{ topic_filter = TopicFilter, hash_bitfilter = HashBitfilter, hash_bitmask = HashBitmask, time_bitfilter = TimeBitfilter, time_bitmask = TimeBitmask - }, - Bitstring, - Value + } ) -> - HashMatches = (Bitstring band HashBitmask) == HashBitfilter, - TimeMatches = (Bitstring band TimeBitmask) >= TimeBitfilter, + HashMatches = ?topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), + TimeMatches = ?time_matches(Bitstring, TimeBitfilter, TimeBitmask), case HashMatches and TimeMatches of true -> - {Topic, MessagePayload} = unwrap_message_value(Value), + Message = {Topic, _Payload} = unwrap_message_value(Value), case emqx_topic:match(Topic, TopicFilter) of true -> - {value, MessagePayload, It#it{next_action = next}}; + Message; false -> - next(It#it{next_action = next}) + next end; false -> - case compute_next_seek(HashMatches, TimeMatches, Bitstring, It) of - NextBitstring when is_integer(NextBitstring) -> - % ct:pal("Bitstring = ~32.16.0B", [Bitstring]), - % ct:pal("Bitfilter = ~32.16.0B", [Bitfilter]), - % ct:pal("HBitmask = ~32.16.0B", [HashBitmask]), - % ct:pal("TBitmask = ~32.16.0B", [TimeBitmask]), - % ct:pal("NextBitstring = ~32.16.0B", [NextBitstring]), - NextSeek = combine(NextBitstring, <<>>, Keymapper), - next(It#it{next_action = {seek, NextSeek}}); - none -> - stop_iteration(It) - end + compute_next_seek(HashMatches, TimeMatches, Bitstring, Filter) end. -stop_iteration(It) -> - ok = rocksdb:iterator_close(It#it.handle), - none. - %% `Bitstring` is out of the hash space defined by `HashBitfilter`. -compute_next_seek(_HashMatches = false, _, Bitstring, It) -> - NextBitstring = compute_topic_seek( - Bitstring, - It#it.hash_bitfilter, - It#it.hash_bitmask, - It#it.keymapper - ), +compute_next_seek( + _HashMatches = false, + _TimeMatches, + Bitstring, + Filter = #filter{ + keymapper = Keymapper, + hash_bitfilter = HashBitfilter, + hash_bitmask = HashBitmask, + time_bitfilter = TimeBitfilter, + time_bitmask = TimeBitmask + } +) -> + NextBitstring = compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper), case NextBitstring of none -> none; _ -> - TimeMatches = (NextBitstring band It#it.time_bitmask) >= It#it.time_bitfilter, - compute_next_seek(true, TimeMatches, NextBitstring, It) + TimeMatches = ?time_matches(NextBitstring, TimeBitfilter, TimeBitmask), + compute_next_seek(true, TimeMatches, NextBitstring, Filter) end; %% `Bitstring` is out of the time range defined by `TimeBitfilter`. -compute_next_seek(_HashMatches = true, _TimeMatches = false, Bitstring, It) -> - compute_time_seek(Bitstring, It#it.time_bitfilter, It#it.time_bitmask); +compute_next_seek( + _HashMatches = true, + _TimeMatches = false, + Bitstring, + #filter{ + time_bitfilter = TimeBitfilter, + time_bitmask = TimeBitmask + } +) -> + compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask); compute_next_seek(true, true, Bitstring, _It) -> Bitstring. @@ -466,7 +527,7 @@ compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) -> compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) -> % NOTE % We're iterating through `Substring` here, in lockstep with `HashBitfilter` - % and`HashBitmask`, starting from least signigicant bits. Each bitsource in + % and `HashBitmask`, starting from least signigicant bits. Each bitsource in % `Sources` has a bitsize `S` and, accordingly, gives us a sub-bitstring `S` % bits long which we interpret as a "digit". There are 2 flavors of those % "digits": @@ -573,6 +634,10 @@ substring(I, Offset, Size) -> data_cf(GenId) -> ?MODULE_STRING ++ integer_to_list(GenId). +stop_iteration(It) -> + ok = rocksdb:iterator_close(It#it.handle), + none. + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). From 94e8dbfe7fea047a42741ccb8e83e3f03c7aee59 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 4 Jan 2023 22:05:09 +0300 Subject: [PATCH 19/49] test(ds): Move proptests into a separate module Following conventions. Also add few proptests on keyspace filters. --- .../test/emqx_replay_storage_SUITE.erl | 111 ---------- .../test/{ => props}/payload_gen.erl | 0 .../test/props/prop_replay_storage.erl | 189 ++++++++++++++++++ 3 files changed, 189 insertions(+), 111 deletions(-) rename apps/emqx_replay/test/{ => props}/payload_gen.erl (100%) create mode 100644 apps/emqx_replay/test/props/prop_replay_storage.erl diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index 30850927b..c99063350 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -20,7 +20,6 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("stdlib/include/assert.hrl"). --include_lib("proper/include/proper.hrl"). -define(ZONE, zone(?FUNCTION_NAME)). @@ -137,116 +136,6 @@ parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) -> parse_topic(Topic) -> emqx_topic:words(iolist_to_binary(Topic)). -%% - -t_prop_topic_hash_computes(_) -> - Keymapper = emqx_replay_message_storage:make_keymapper(#{ - timestamp_bits => 32, - topic_bits_per_level => [8, 12, 16, 24], - epoch => 10000 - }), - ?assert( - proper:quickcheck( - ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin - BS = emqx_replay_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), - is_integer(BS) andalso (BS < (1 bsl 92)) - end) - ) - ). - -t_prop_topic_bitmask_computes(_) -> - Keymapper = emqx_replay_message_storage:make_keymapper(#{ - timestamp_bits => 16, - topic_bits_per_level => [8, 12, 16], - epoch => 100 - }), - ?assert( - proper:quickcheck( - ?FORALL(TopicFilter, topic_filter(), begin - Mask = emqx_replay_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), - is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) - end) - ) - ). - -t_prop_iterate_stored_messages(_) -> - ?assertEqual( - true, - proper:quickcheck( - ?FORALL( - Streams, - messages(), - begin - Stream = payload_gen:interleave_streams(Streams), - ok = store_message_stream(?ZONE, Stream), - % TODO actually verify some property - true - end - ) - ) - ). - -store_message_stream(Zone, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> - MessageID = <>, - PublishedAt = rand:uniform(ChunkNum), - ok = emqx_replay_local_store:store(Zone, MessageID, PublishedAt, Topic, Payload), - store_message_stream(Zone, payload_gen:next(Rest)); -store_message_stream(_Zone, []) -> - ok. - -messages() -> - ?LET(Topics, list(topic()), begin - [{Topic, payload_gen:binary_stream_gen(64)} || Topic <- Topics] - end). - -topic() -> - % TODO - % Somehow generate topic levels with variance according to the entropy distribution? - non_empty(list(topic_level())). - -topic(EntropyWeights) -> - ?LET( - L, - list(1), - ?SIZED(S, [topic_level(S * EW) || EW <- lists:sublist(EntropyWeights ++ L, length(L))]) - ). - -topic_filter() -> - ?SUCHTHAT( - L, - non_empty( - list( - frequency([ - {5, topic_level()}, - {2, '+'}, - {1, '#'} - ]) - ) - ), - not lists:member('#', L) orelse lists:last(L) == '#' - ). - -% topic() -> -% ?LAZY(?SIZED(S, frequency([ -% {S, [topic_level() | topic()]}, -% {1, []} -% ]))). - -% topic_filter() -> -% ?LAZY(?SIZED(S, frequency([ -% {round(S / 3 * 2), [topic_level() | topic_filter()]}, -% {round(S / 3 * 1), ['+' | topic_filter()]}, -% {1, []}, -% {1, ['#']} -% ]))). - -topic_level() -> - ?LET(L, list(oneof([range($a, $z), range($0, $9)])), iolist_to_binary(L)). - -topic_level(Entropy) -> - S = floor(1 + math:log2(Entropy) / 4), - ?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))). - %% CT callbacks all() -> emqx_common_test_helpers:all(?MODULE). diff --git a/apps/emqx_replay/test/payload_gen.erl b/apps/emqx_replay/test/props/payload_gen.erl similarity index 100% rename from apps/emqx_replay/test/payload_gen.erl rename to apps/emqx_replay/test/props/payload_gen.erl diff --git a/apps/emqx_replay/test/props/prop_replay_storage.erl b/apps/emqx_replay/test/props/prop_replay_storage.erl new file mode 100644 index 000000000..c2d63a3d7 --- /dev/null +++ b/apps/emqx_replay/test/props/prop_replay_storage.erl @@ -0,0 +1,189 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(prop_replay_storage). + +-include_lib("proper/include/proper.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(ZONE, mk_zone_name(?FUNCTION_NAME)). +-define(SETUP(Test), ?SETUP(fun() -> setup(?ZONE) end, Test)). + +%%-------------------------------------------------------------------- +%% Properties +%%-------------------------------------------------------------------- + +prop_bitstring_computes() -> + ?FORALL(Keymapper, keymapper(), begin + Bitsize = emqx_replay_message_storage:bitsize(Keymapper), + ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin + BS = emqx_replay_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), + is_integer(BS) andalso (BS < (1 bsl Bitsize)) + end) + end). + +prop_topic_bitmask_computes() -> + Keymapper = make_keymapper(16, [8, 12, 16], 100), + ?FORALL(TopicFilter, topic_filter(), begin + Mask = emqx_replay_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), + % topic bits + timestamp LSBs + is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) + end). + +prop_next_seek_monotonic() -> + ?FORALL( + {TopicFilter, StartTime, Keymapper}, + {topic_filter(), pos_integer(), keymapper()}, + begin + Filter = emqx_replay_message_storage:make_keyspace_filter( + TopicFilter, StartTime, Keymapper + ), + ?FORALL( + Bitstring, + bitstr(emqx_replay_message_storage:bitsize(Keymapper)), + emqx_replay_message_storage:compute_next_seek(Bitstring, Filter) >= Bitstring + ) + end + ). + +prop_next_seek_eq_initial_seek() -> + ?FORALL( + Filter, + keyspace_filter(), + emqx_replay_message_storage:compute_initial_seek(Filter) =:= + emqx_replay_message_storage:compute_next_seek(0, Filter) + ). + +prop_iterate_stored_messages() -> + ?SETUP( + ?FORALL(Streams, message_streams(), begin + Stream = payload_gen:interleave_streams(Streams), + ok = store_message_stream(?ZONE, Stream), + % TODO actually verify some property + true + end) + ). + +store_message_stream(Zone, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> + MessageID = <>, + PublishedAt = rand:uniform(ChunkNum), + ok = emqx_replay_local_store:store(Zone, MessageID, PublishedAt, Topic, Payload), + store_message_stream(Zone, payload_gen:next(Rest)); +store_message_stream(_Zone, []) -> + ok. + +%%-------------------------------------------------------------------- +%% Setup / teardown +%%-------------------------------------------------------------------- + +setup(Zone) -> + {ok, _} = application:ensure_all_started(emqx_replay), + {ok, _} = emqx_replay_local_store_sup:start_zone(Zone), + fun() -> + application:stop(emqx_replay) + end. + +%%-------------------------------------------------------------------- +%% Type generators +%%-------------------------------------------------------------------- + +topic() -> + % TODO + % Somehow generate topic levels with variance according to the entropy distribution? + non_empty(list(topic_level())). + +topic(EntropyWeights) -> + ?LET( + L, + list(1), + ?SIZED(S, [topic_level(S * EW) || EW <- lists:sublist(EntropyWeights ++ L, length(L))]) + ). + +% entropy_weights() -> + +topic_filter() -> + ?SUCHTHAT( + L, + non_empty( + list( + frequency([ + {5, topic_level()}, + {2, '+'}, + {1, '#'} + ]) + ) + ), + not lists:member('#', L) orelse lists:last(L) == '#' + ). + +% topic() -> +% ?LAZY(?SIZED(S, frequency([ +% {S, [topic_level() | topic()]}, +% {1, []} +% ]))). + +% topic_filter() -> +% ?LAZY(?SIZED(S, frequency([ +% {round(S / 3 * 2), [topic_level() | topic_filter()]}, +% {round(S / 3 * 1), ['+' | topic_filter()]}, +% {1, []}, +% {1, ['#']} +% ]))). + +topic_level() -> + ?LET(L, list(oneof([range($a, $z), range($0, $9)])), iolist_to_binary(L)). + +topic_level(Entropy) -> + S = floor(1 + math:log2(Entropy) / 4), + ?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))). + +keymapper() -> + ?LET( + {TimestampBits, TopicBits, Epoch}, + { + range(0, 128), + non_empty(list(range(1, 32))), + pos_integer() + }, + make_keymapper(TimestampBits, TopicBits, Epoch * 100) + ). + +keyspace_filter() -> + ?LET( + {TopicFilter, StartTime, Keymapper}, + {topic_filter(), pos_integer(), keymapper()}, + emqx_replay_message_storage:make_keyspace_filter(TopicFilter, StartTime, Keymapper) + ). + +bitstr(Size) -> + ?LET(B, binary(1 + (Size div 8)), binary:decode_unsigned(B) band (1 bsl Size - 1)). + +message_streams() -> + ?LET(Topics, list(topic()), begin + [{Topic, payload_gen:binary_stream_gen(64)} || Topic <- Topics] + end). + +%% + +make_keymapper(TimestampBits, TopicBits, MaxEpoch) -> + emqx_replay_message_storage:make_keymapper(#{ + timestamp_bits => TimestampBits, + topic_bits_per_level => TopicBits, + epoch => MaxEpoch + }). + +mk_zone_name(TC) -> + list_to_atom(?MODULE_STRING ++ "_" ++ atom_to_list(TC)). From 1f5425975fd00e3c86f6181d767eac64c0f77228 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 5 Jan 2023 22:48:10 +0300 Subject: [PATCH 20/49] feat(ds): add an ability to preserve and restore iterators This will allow to persist iteration state and to periodically recreate iterators during long replays. --- .../src/emqx_replay_message_storage.erl | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 3988e97dc..1c91066cf 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -97,6 +97,9 @@ -export([make_iterator/3]). -export([next/1]). +-export([preserve_iterator/1]). +-export([restore_iterator/2]). + %% Debug/troubleshooting: %% Keymappers -export([ @@ -168,12 +171,14 @@ -record(it, { handle :: rocksdb:itr_handle(), filter :: keyspace_filter(), + cursor :: binary() | undefined, next_action :: {seek, binary()} | next }). -record(filter, { keymapper :: keymapper(), topic_filter :: emqx_topic:words(), + start_time :: integer(), hash_bitfilter :: integer(), hash_bitmask :: integer(), time_bitfilter :: integer(), @@ -287,7 +292,8 @@ next(It = #it{filter = #filter{keymapper = Keymapper}}) -> Bitstring = extract(Key, Keymapper), case match_next(Bitstring, Value, It#it.filter) of {_Topic, Payload} -> - {value, Payload, It#it{next_action = next}}; + % Preserve last seen key in the iterator so it could be restored later. + {value, Payload, It#it{cursor = Key, next_action = next}}; next -> next(It#it{next_action = next}); NextBitstring when is_integer(NextBitstring) -> @@ -302,6 +308,37 @@ next(It = #it{filter = #filter{keymapper = Keymapper}}) -> {error, closed} end. +-spec preserve_iterator(iterator()) -> binary(). +preserve_iterator(#it{cursor = Cursor, filter = Filter}) -> + State = #{ + v => 1, + cursor => Cursor, + filter => Filter#filter.topic_filter, + stime => Filter#filter.start_time + }, + term_to_binary(State). + +-spec restore_iterator(db(), binary()) -> {ok, iterator()} | {error, _TODO}. +restore_iterator(DB, Serial) when is_binary(Serial) -> + State = binary_to_term(Serial), + restore_iterator(DB, State); +restore_iterator(DB, #{ + v := 1, + cursor := Cursor, + filter := TopicFilter, + stime := StartTime +}) -> + case make_iterator(DB, TopicFilter, StartTime) of + {ok, It} when Cursor == undefined -> + % Iterator was preserved right after it has been made. + {ok, It}; + {ok, It} -> + % Iterator was preserved mid-replay, seek right past the last seen key. + {ok, It#it{cursor = Cursor, next_action = {seek, successor(Cursor)}}}; + Err -> + Err + end. + %%================================================================================ %% Internal exports %%================================================================================ @@ -365,6 +402,7 @@ make_keyspace_filter(TopicFilter, StartTime, Keymapper) -> #filter{ keymapper = Keymapper, topic_filter = TopicFilter, + start_time = StartTime, hash_bitfilter = HashBitfilter, hash_bitmask = HashBitmask, time_bitfilter = TimeBitfilter, @@ -437,6 +475,10 @@ bitwise_concat(Acc, Item, ItemSize) -> ones(Bits) -> 1 bsl Bits - 1. +-spec successor(key()) -> key(). +successor(Key) -> + <>. + %% |123|345|678| %% foo bar baz From 440fecd2680a1259bf5b41111b0b15dd51cc96b5 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 5 Jan 2023 22:52:08 +0300 Subject: [PATCH 21/49] test(ds): Add a proptest on iterator preserve / restore Which verifies that preservation and restoration of iterators does not affect the outcome of an iteration (under the precondition that the state of database is constant during an iteration). --- .../props/prop_replay_message_storage.erl | 340 ++++++++++++++++++ .../test/props/prop_replay_storage.erl | 189 ---------- 2 files changed, 340 insertions(+), 189 deletions(-) create mode 100644 apps/emqx_replay/test/props/prop_replay_message_storage.erl delete mode 100644 apps/emqx_replay/test/props/prop_replay_storage.erl diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl new file mode 100644 index 000000000..222914680 --- /dev/null +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -0,0 +1,340 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(prop_replay_message_storage). + +-include_lib("proper/include/proper.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(GEN_ID, 42). + +-define(PROP_FULLNAME, ?MODULE_STRING ++ "." ++ atom_to_list(?FUNCTION_NAME)). + +%%-------------------------------------------------------------------- +%% Properties +%%-------------------------------------------------------------------- + +prop_bitstring_computes() -> + ?FORALL(Keymapper, keymapper(), begin + Bitsize = emqx_replay_message_storage:bitsize(Keymapper), + ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin + BS = emqx_replay_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), + is_integer(BS) andalso (BS < (1 bsl Bitsize)) + end) + end). + +prop_topic_bitmask_computes() -> + Keymapper = make_keymapper(16, [8, 12, 16], 100), + ?FORALL(TopicFilter, topic_filter(), begin + Mask = emqx_replay_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), + % topic bits + timestamp LSBs + is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) + end). + +prop_next_seek_monotonic() -> + ?FORALL( + {TopicFilter, StartTime, Keymapper}, + {topic_filter(), pos_integer(), keymapper()}, + begin + Filter = emqx_replay_message_storage:make_keyspace_filter( + TopicFilter, + StartTime, + Keymapper + ), + ?FORALL( + Bitstring, + bitstr(emqx_replay_message_storage:bitsize(Keymapper)), + emqx_replay_message_storage:compute_next_seek(Bitstring, Filter) >= Bitstring + ) + end + ). + +prop_next_seek_eq_initial_seek() -> + ?FORALL( + Filter, + keyspace_filter(), + emqx_replay_message_storage:compute_initial_seek(Filter) =:= + emqx_replay_message_storage:compute_next_seek(0, Filter) + ). + +prop_iterate_eq_iterate_with_preserve_restore() -> + TBPL = [4, 8, 16, 12], + DB = open(?PROP_FULLNAME, #{ + timestamp_bits => 32, + topic_bits_per_level => TBPL, + epoch => 500 + }), + ?FORALL(Stream, non_empty(messages(topic(TBPL))), begin + % TODO + % This proptest is impure because messages from testruns assumed to be + % independent of each other are accumulated in the same storage. This + % would probably confuse shrinker in the event a testrun fails. + ok = store(DB, Stream), + ?FORALL( + { + {Topic, _}, + Pat, + StartTime, + Commands + }, + { + nth(Stream), + topic_filter_pattern(), + start_time(), + shuffled(flat([non_empty(list({preserve, restore})), list(iterate)])) + }, + begin + TopicFilter = make_topic_filter(Pat, Topic), + Iterator = make_iterator(DB, TopicFilter, StartTime), + Messages = run_iterator_commands(Commands, Iterator, DB), + Messages =:= iterate(DB, TopicFilter, StartTime) + end + ) + end). + +% store_message_stream(DB, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> +% MessageID = emqx_guid:gen(), +% PublishedAt = ChunkNum, +% MessageID, PublishedAt, Topic +% ]), +% ok = emqx_replay_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload), +% store_message_stream(DB, payload_gen:next(Rest)); +% store_message_stream(_Zone, []) -> +% ok. + +store(DB, Messages) -> + lists:foreach( + fun({Topic, Payload = {MessageID, Timestamp, _}}) -> + Bin = term_to_binary(Payload), + emqx_replay_message_storage:store(DB, MessageID, Timestamp, Topic, Bin) + end, + Messages + ). + +iterate(DB, TopicFilter, StartTime) -> + iterate(make_iterator(DB, TopicFilter, StartTime)). + +iterate(It) -> + case emqx_replay_message_storage:next(It) of + {value, Payload, ItNext} -> + [binary_to_term(Payload) | iterate(ItNext)]; + none -> + [] + end. + +make_iterator(DB, TopicFilter, StartTime) -> + {ok, It} = emqx_replay_message_storage:make_iterator(DB, TopicFilter, StartTime), + It. + +run_iterator_commands([iterate | Rest], It, DB) -> + case emqx_replay_message_storage:next(It) of + {value, Payload, ItNext} -> + [binary_to_term(Payload) | run_iterator_commands(Rest, ItNext, DB)]; + none -> + [] + end; +run_iterator_commands([{preserve, restore} | Rest], It, DB) -> + Serial = emqx_replay_message_storage:preserve_iterator(It), + {ok, ItNext} = emqx_replay_message_storage:restore_iterator(DB, Serial), + run_iterator_commands(Rest, ItNext, DB); +run_iterator_commands([], It, _DB) -> + iterate(It). + +%%-------------------------------------------------------------------- +%% Setup / teardown +%%-------------------------------------------------------------------- + +open(Filename, Options) -> + {ok, DBHandle} = rocksdb:open(Filename, [{create_if_missing, true}]), + {Schema, CFRefs} = emqx_replay_message_storage:create_new(DBHandle, ?GEN_ID, Options), + emqx_replay_message_storage:open(DBHandle, ?GEN_ID, CFRefs, Schema). + +%%-------------------------------------------------------------------- +%% Type generators +%%-------------------------------------------------------------------- + +topic() -> + non_empty(list(topic_level())). + +topic(EntropyWeights) -> + ?LET(L, scaled(1 / 4, list(1)), begin + EWs = lists:sublist(EntropyWeights ++ L, length(L)), + ?SIZED(S, [oneof([topic_level(S * EW), topic_level_fixed()]) || EW <- EWs]) + end). + +topic_filter() -> + ?SUCHTHAT( + L, + non_empty( + list( + frequency([ + {5, topic_level()}, + {2, '+'}, + {1, '#'} + ]) + ) + ), + not lists:member('#', L) orelse lists:last(L) == '#' + ). + +topic_level_pattern() -> + frequency([ + {5, level}, + {2, '+'}, + {1, '#'} + ]). + +topic_filter_pattern() -> + list(topic_level_pattern()). + +topic_filter(Topic) -> + ?LET({T, Pat}, {Topic, topic_filter_pattern()}, make_topic_filter(Pat, T)). + +make_topic_filter([], _) -> + []; +make_topic_filter(_, []) -> + []; +make_topic_filter(['#' | _], _) -> + ['#']; +make_topic_filter(['+' | Rest], [_ | Levels]) -> + ['+' | make_topic_filter(Rest, Levels)]; +make_topic_filter([level | Rest], [L | Levels]) -> + [L | make_topic_filter(Rest, Levels)]. + +% topic() -> +% ?LAZY(?SIZED(S, frequency([ +% {S, [topic_level() | topic()]}, +% {1, []} +% ]))). + +% topic_filter() -> +% ?LAZY(?SIZED(S, frequency([ +% {round(S / 3 * 2), [topic_level() | topic_filter()]}, +% {round(S / 3 * 1), ['+' | topic_filter()]}, +% {1, []}, +% {1, ['#']} +% ]))). + +topic_level() -> + ?LET(L, list(oneof([range($a, $z), range($0, $9)])), iolist_to_binary(L)). + +topic_level(Entropy) -> + S = floor(1 + math:log2(Entropy) / 4), + ?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))). + +topic_level_fixed() -> + oneof([ + <<"foo">>, + <<"bar">>, + <<"baz">>, + <<"xyzzy">> + ]). + +keymapper() -> + ?LET( + {TimestampBits, TopicBits, Epoch}, + { + range(0, 128), + non_empty(list(range(1, 32))), + pos_integer() + }, + make_keymapper(TimestampBits, TopicBits, Epoch * 100) + ). + +keyspace_filter() -> + ?LET( + {TopicFilter, StartTime, Keymapper}, + {topic_filter(), pos_integer(), keymapper()}, + emqx_replay_message_storage:make_keyspace_filter(TopicFilter, StartTime, Keymapper) + ). + +messages(Topic) -> + ?LET( + Ts, + list(Topic), + interleaved( + ?LET(Messages, vector(length(Ts), list(message())), lists:zip(Ts, Messages)) + ) + ). + +message() -> + ?LET({Timestamp, Payload}, {timestamp(), binary()}, {emqx_guid:gen(), Timestamp, Payload}). + +message_streams(Topic) -> + ?LET(Topics, list(Topic), [{T, payload_gen:binary_stream_gen(64)} || T <- Topics]). + +timestamp() -> + scaled(20, pos_integer()). + +start_time() -> + scaled(10, pos_integer()). + +bitstr(Size) -> + ?LET(B, binary(1 + (Size div 8)), binary:decode_unsigned(B) band (1 bsl Size - 1)). + +nth(L) -> + ?LET(I, range(1, length(L)), lists:nth(I, L)). + +scaled(Factor, T) -> + ?SIZED(S, resize(ceil(S * Factor), T)). + +interleaved(T) -> + ?LET({L, Seed}, {T, integer()}, interleave(L, rand:seed_s(exsss, Seed))). + +shuffled(T) -> + ?LET({L, Seed}, {T, integer()}, shuffle(L, rand:seed_s(exsss, Seed))). + +flat(T) -> + ?LET(L, T, lists:flatten(L)). + +%%-------------------------------------------------------------------- +%% Internal functions +%%-------------------------------------------------------------------- + +make_keymapper(TimestampBits, TopicBits, MaxEpoch) -> + emqx_replay_message_storage:make_keymapper(#{ + timestamp_bits => TimestampBits, + topic_bits_per_level => TopicBits, + epoch => MaxEpoch + }). + +-spec interleave(list({Tag, list(E)}), rand:state()) -> list({Tag, E}). +interleave(Seqs, Rng) -> + interleave(Seqs, length(Seqs), Rng). + +interleave(Seqs, L, Rng) when L > 0 -> + {N, RngNext} = rand:uniform_s(L, Rng), + {SeqHead, SeqTail} = lists:split(N - 1, Seqs), + case SeqTail of + [{Tag, [M | Rest]} | SeqRest] -> + [{Tag, M} | interleave(SeqHead ++ [{Tag, Rest} | SeqRest], L, RngNext)]; + [{_, []} | SeqRest] -> + interleave(SeqHead ++ SeqRest, L - 1, RngNext) + end; +interleave([], 0, _) -> + []. + +-spec shuffle(list(E), rand:state()) -> list(E). +shuffle(L, Rng) -> + {Rands, _} = randoms(length(L), Rng), + [E || {_, E} <- lists:sort(lists:zip(Rands, L))]. + +randoms(N, Rng) when N > 0 -> + {Rand, RngNext} = rand:uniform_s(Rng), + {Tail, RngFinal} = randoms(N - 1, RngNext), + {[Rand | Tail], RngFinal}; +randoms(_, Rng) -> + {[], Rng}. diff --git a/apps/emqx_replay/test/props/prop_replay_storage.erl b/apps/emqx_replay/test/props/prop_replay_storage.erl deleted file mode 100644 index c2d63a3d7..000000000 --- a/apps/emqx_replay/test/props/prop_replay_storage.erl +++ /dev/null @@ -1,189 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(prop_replay_storage). - --include_lib("proper/include/proper.hrl"). --include_lib("eunit/include/eunit.hrl"). - --define(ZONE, mk_zone_name(?FUNCTION_NAME)). --define(SETUP(Test), ?SETUP(fun() -> setup(?ZONE) end, Test)). - -%%-------------------------------------------------------------------- -%% Properties -%%-------------------------------------------------------------------- - -prop_bitstring_computes() -> - ?FORALL(Keymapper, keymapper(), begin - Bitsize = emqx_replay_message_storage:bitsize(Keymapper), - ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin - BS = emqx_replay_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), - is_integer(BS) andalso (BS < (1 bsl Bitsize)) - end) - end). - -prop_topic_bitmask_computes() -> - Keymapper = make_keymapper(16, [8, 12, 16], 100), - ?FORALL(TopicFilter, topic_filter(), begin - Mask = emqx_replay_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), - % topic bits + timestamp LSBs - is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) - end). - -prop_next_seek_monotonic() -> - ?FORALL( - {TopicFilter, StartTime, Keymapper}, - {topic_filter(), pos_integer(), keymapper()}, - begin - Filter = emqx_replay_message_storage:make_keyspace_filter( - TopicFilter, StartTime, Keymapper - ), - ?FORALL( - Bitstring, - bitstr(emqx_replay_message_storage:bitsize(Keymapper)), - emqx_replay_message_storage:compute_next_seek(Bitstring, Filter) >= Bitstring - ) - end - ). - -prop_next_seek_eq_initial_seek() -> - ?FORALL( - Filter, - keyspace_filter(), - emqx_replay_message_storage:compute_initial_seek(Filter) =:= - emqx_replay_message_storage:compute_next_seek(0, Filter) - ). - -prop_iterate_stored_messages() -> - ?SETUP( - ?FORALL(Streams, message_streams(), begin - Stream = payload_gen:interleave_streams(Streams), - ok = store_message_stream(?ZONE, Stream), - % TODO actually verify some property - true - end) - ). - -store_message_stream(Zone, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> - MessageID = <>, - PublishedAt = rand:uniform(ChunkNum), - ok = emqx_replay_local_store:store(Zone, MessageID, PublishedAt, Topic, Payload), - store_message_stream(Zone, payload_gen:next(Rest)); -store_message_stream(_Zone, []) -> - ok. - -%%-------------------------------------------------------------------- -%% Setup / teardown -%%-------------------------------------------------------------------- - -setup(Zone) -> - {ok, _} = application:ensure_all_started(emqx_replay), - {ok, _} = emqx_replay_local_store_sup:start_zone(Zone), - fun() -> - application:stop(emqx_replay) - end. - -%%-------------------------------------------------------------------- -%% Type generators -%%-------------------------------------------------------------------- - -topic() -> - % TODO - % Somehow generate topic levels with variance according to the entropy distribution? - non_empty(list(topic_level())). - -topic(EntropyWeights) -> - ?LET( - L, - list(1), - ?SIZED(S, [topic_level(S * EW) || EW <- lists:sublist(EntropyWeights ++ L, length(L))]) - ). - -% entropy_weights() -> - -topic_filter() -> - ?SUCHTHAT( - L, - non_empty( - list( - frequency([ - {5, topic_level()}, - {2, '+'}, - {1, '#'} - ]) - ) - ), - not lists:member('#', L) orelse lists:last(L) == '#' - ). - -% topic() -> -% ?LAZY(?SIZED(S, frequency([ -% {S, [topic_level() | topic()]}, -% {1, []} -% ]))). - -% topic_filter() -> -% ?LAZY(?SIZED(S, frequency([ -% {round(S / 3 * 2), [topic_level() | topic_filter()]}, -% {round(S / 3 * 1), ['+' | topic_filter()]}, -% {1, []}, -% {1, ['#']} -% ]))). - -topic_level() -> - ?LET(L, list(oneof([range($a, $z), range($0, $9)])), iolist_to_binary(L)). - -topic_level(Entropy) -> - S = floor(1 + math:log2(Entropy) / 4), - ?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))). - -keymapper() -> - ?LET( - {TimestampBits, TopicBits, Epoch}, - { - range(0, 128), - non_empty(list(range(1, 32))), - pos_integer() - }, - make_keymapper(TimestampBits, TopicBits, Epoch * 100) - ). - -keyspace_filter() -> - ?LET( - {TopicFilter, StartTime, Keymapper}, - {topic_filter(), pos_integer(), keymapper()}, - emqx_replay_message_storage:make_keyspace_filter(TopicFilter, StartTime, Keymapper) - ). - -bitstr(Size) -> - ?LET(B, binary(1 + (Size div 8)), binary:decode_unsigned(B) band (1 bsl Size - 1)). - -message_streams() -> - ?LET(Topics, list(topic()), begin - [{Topic, payload_gen:binary_stream_gen(64)} || Topic <- Topics] - end). - -%% - -make_keymapper(TimestampBits, TopicBits, MaxEpoch) -> - emqx_replay_message_storage:make_keymapper(#{ - timestamp_bits => TimestampBits, - topic_bits_per_level => TopicBits, - epoch => MaxEpoch - }). - -mk_zone_name(TC) -> - list_to_atom(?MODULE_STRING ++ "_" ++ atom_to_list(TC)). From 82a7292851d70af0b52d0e3ddaaa529f9926bb6c Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 6 Jan 2023 13:51:50 +0300 Subject: [PATCH 22/49] test(ds): Use `_build/test/proper` as a scratch dir for testruns --- .../props/prop_replay_message_storage.erl | 59 ++++++++++++++----- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl index 222914680..baab164f0 100644 --- a/apps/emqx_replay/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -19,10 +19,10 @@ -include_lib("proper/include/proper.hrl"). -include_lib("eunit/include/eunit.hrl"). +-define(WORK_DIR, ["_build", "test"]). +-define(RUN_ID, {?MODULE, testrun_id}). -define(GEN_ID, 42). --define(PROP_FULLNAME, ?MODULE_STRING ++ "." ++ atom_to_list(?FUNCTION_NAME)). - %%-------------------------------------------------------------------- %% Properties %%-------------------------------------------------------------------- @@ -72,17 +72,18 @@ prop_next_seek_eq_initial_seek() -> prop_iterate_eq_iterate_with_preserve_restore() -> TBPL = [4, 8, 16, 12], - DB = open(?PROP_FULLNAME, #{ + Options = #{ timestamp_bits => 32, topic_bits_per_level => TBPL, epoch => 500 - }), + }, + {DB, _Handle} = open_db(make_filepath(?FUNCTION_NAME), Options), ?FORALL(Stream, non_empty(messages(topic(TBPL))), begin % TODO % This proptest is impure because messages from testruns assumed to be % independent of each other are accumulated in the same storage. This % would probably confuse shrinker in the event a testrun fails. - ok = store(DB, Stream), + ok = store_db(DB, Stream), ?FORALL( { {Topic, _}, @@ -100,7 +101,7 @@ prop_iterate_eq_iterate_with_preserve_restore() -> TopicFilter = make_topic_filter(Pat, Topic), Iterator = make_iterator(DB, TopicFilter, StartTime), Messages = run_iterator_commands(Commands, Iterator, DB), - Messages =:= iterate(DB, TopicFilter, StartTime) + equals(Messages, iterate_db(DB, TopicFilter, StartTime)) end ) end). @@ -115,7 +116,7 @@ prop_iterate_eq_iterate_with_preserve_restore() -> % store_message_stream(_Zone, []) -> % ok. -store(DB, Messages) -> +store_db(DB, Messages) -> lists:foreach( fun({Topic, Payload = {MessageID, Timestamp, _}}) -> Bin = term_to_binary(Payload), @@ -124,13 +125,13 @@ store(DB, Messages) -> Messages ). -iterate(DB, TopicFilter, StartTime) -> - iterate(make_iterator(DB, TopicFilter, StartTime)). +iterate_db(DB, TopicFilter, StartTime) -> + iterate_db(make_iterator(DB, TopicFilter, StartTime)). -iterate(It) -> +iterate_db(It) -> case emqx_replay_message_storage:next(It) of {value, Payload, ItNext} -> - [binary_to_term(Payload) | iterate(ItNext)]; + [binary_to_term(Payload) | iterate_db(ItNext)]; none -> [] end. @@ -151,16 +152,42 @@ run_iterator_commands([{preserve, restore} | Rest], It, DB) -> {ok, ItNext} = emqx_replay_message_storage:restore_iterator(DB, Serial), run_iterator_commands(Rest, ItNext, DB); run_iterator_commands([], It, _DB) -> - iterate(It). + iterate_db(It). %%-------------------------------------------------------------------- %% Setup / teardown %%-------------------------------------------------------------------- -open(Filename, Options) -> - {ok, DBHandle} = rocksdb:open(Filename, [{create_if_missing, true}]), - {Schema, CFRefs} = emqx_replay_message_storage:create_new(DBHandle, ?GEN_ID, Options), - emqx_replay_message_storage:open(DBHandle, ?GEN_ID, CFRefs, Schema). +open_db(Filepath, Options) -> + {ok, Handle} = rocksdb:open(Filepath, [{create_if_missing, true}]), + {Schema, CFRefs} = emqx_replay_message_storage:create_new(Handle, ?GEN_ID, Options), + DB = emqx_replay_message_storage:open(Handle, ?GEN_ID, CFRefs, Schema), + {DB, Handle}. + +close_db(Handle) -> + rocksdb:close(Handle). + +make_filepath(TC) -> + make_filepath(TC, 0). + +make_filepath(TC, InstID) -> + Name = io_lib:format("~0p.~0p", [TC, InstID]), + Path = filename:join(?WORK_DIR ++ ["proper", "runs", get_run_id(), ?MODULE_STRING, Name]), + ok = filelib:ensure_dir(Path), + Path. + +get_run_id() -> + case persistent_term:get(?RUN_ID, undefined) of + RunID when RunID /= undefined -> + RunID; + undefined -> + RunID = make_run_id(), + ok = persistent_term:put(?RUN_ID, RunID), + RunID + end. + +make_run_id() -> + calendar:system_time_to_rfc3339(erlang:system_time(second), [{offset, "Z"}]). %%-------------------------------------------------------------------- %% Type generators From 60e307032879a1b7842e0dbaef68b84b3078990b Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 6 Jan 2023 13:52:53 +0300 Subject: [PATCH 23/49] test(ds): Scale up number of messages per topic in proptests --- apps/emqx_replay/test/props/prop_replay_message_storage.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl index baab164f0..8be5a5edb 100644 --- a/apps/emqx_replay/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -293,7 +293,7 @@ messages(Topic) -> Ts, list(Topic), interleaved( - ?LET(Messages, vector(length(Ts), list(message())), lists:zip(Ts, Messages)) + ?LET(Messages, vector(length(Ts), scaled(4, list(message()))), lists:zip(Ts, Messages)) ) ). From ac0935ef912f54407c50fb403573e7290e80afd1 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 6 Jan 2023 13:54:59 +0300 Subject: [PATCH 24/49] test(ds): Proptest that iteration is exhaustive Compare iteration results against what an extremely simplified model produces. --- .../emqx_replay_message_storage_shim.erl | 58 ++++++++++++++++++ .../props/prop_replay_message_storage.erl | 59 +++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl diff --git a/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl b/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl new file mode 100644 index 000000000..125c9a9fc --- /dev/null +++ b/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl @@ -0,0 +1,58 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_replay_message_storage_shim). + +-export([open/0]). +-export([close/1]). +-export([store/5]). +-export([iterate/3]). + +-type topic() :: list(binary()). +-type time() :: integer(). + +-opaque t() :: ets:tid(). + +-spec open() -> t(). +open() -> + ets:new(?MODULE, [ordered_set, {keypos, 1}]). + +-spec close(t()) -> ok. +close(Tab) -> + true = ets:delete(Tab), + ok. + +-spec store(t(), emqx_guid:guid(), time(), topic(), binary()) -> + ok | {error, _TODO}. +store(Tab, MessageID, PublishedAt, Topic, Payload) -> + true = ets:insert(Tab, {{PublishedAt, MessageID}, Topic, Payload}), + ok. + +-spec iterate(t(), emqx_topic:words(), time()) -> + [binary()]. +iterate(Tab, TopicFilter, StartTime) -> + ets:foldr( + fun({{PublishedAt, _}, Topic, Payload}, Acc) -> + case emqx_topic:match(Topic, TopicFilter) of + true when PublishedAt >= StartTime -> + [Payload | Acc]; + _ -> + Acc + end + end, + [], + Tab + ). diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl index 8be5a5edb..9619c4f05 100644 --- a/apps/emqx_replay/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -70,6 +70,50 @@ prop_next_seek_eq_initial_seek() -> emqx_replay_message_storage:compute_next_seek(0, Filter) ). +prop_iterate_messages() -> + TBPL = [4, 8, 12], + Options = #{ + timestamp_bits => 32, + topic_bits_per_level => TBPL, + epoch => 200 + }, + % TODO + % Shrinking is too unpredictable and leaves a LOT of garbage in the scratch dit. + ?FORALL(Stream, noshrink(non_empty(messages(topic(TBPL)))), begin + Filepath = make_filepath(?FUNCTION_NAME, erlang:system_time(microsecond)), + {DB, Handle} = open_db(Filepath, Options), + Shim = emqx_replay_message_storage_shim:open(), + ok = store_db(DB, Stream), + ok = store_shim(Shim, Stream), + ?FORALL( + { + {Topic, _}, + Pattern, + StartTime + }, + { + nth(Stream), + topic_filter_pattern(), + start_time() + }, + begin + TopicFilter = make_topic_filter(Pattern, Topic), + Messages = iterate_db(DB, TopicFilter, StartTime), + Reference = iterate_shim(Shim, TopicFilter, StartTime), + ok = close_db(Handle), + ok = emqx_replay_message_storage_shim:close(Shim), + ?WHENFAIL( + begin + io:format(user, " *** Filepath = ~s~n", [Filepath]), + io:format(user, " *** TopicFilter = ~p~n", [TopicFilter]), + io:format(user, " *** StartTime = ~p~n", [StartTime]) + end, + is_list(Messages) andalso equals(Messages -- Reference, Reference -- Messages) + ) + end + ) + end). + prop_iterate_eq_iterate_with_preserve_restore() -> TBPL = [4, 8, 16, 12], Options = #{ @@ -154,6 +198,21 @@ run_iterator_commands([{preserve, restore} | Rest], It, DB) -> run_iterator_commands([], It, _DB) -> iterate_db(It). +store_shim(Shim, Messages) -> + lists:foreach( + fun({Topic, Payload = {MessageID, Timestamp, _}}) -> + Bin = term_to_binary(Payload), + emqx_replay_message_storage_shim:store(Shim, MessageID, Timestamp, Topic, Bin) + end, + Messages + ). + +iterate_shim(Shim, TopicFilter, StartTime) -> + lists:map( + fun binary_to_term/1, + emqx_replay_message_storage_shim:iterate(Shim, TopicFilter, StartTime) + ). + %%-------------------------------------------------------------------- %% Setup / teardown %%-------------------------------------------------------------------- From a11e75d1894add20802fc7983e7342c6473ccca1 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 6 Jan 2023 13:42:28 +0300 Subject: [PATCH 25/49] fix(ds): clear bitmask of topic filter tail containing wildcards --- .../src/emqx_replay_message_storage.erl | 9 +++-- .../test/emqx_replay_storage_SUITE.erl | 35 +++++++++++++++++-- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 1c91066cf..759ddf559 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -457,8 +457,13 @@ compute_topic_bitmask([], [{hash, level, Size} | Rest], Acc) -> compute_topic_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); compute_topic_bitmask([_ | Tail], [{hash, level, Size} | Rest], Acc) -> compute_topic_bitmask(Tail, Rest, bitwise_concat(Acc, ones(Size), Size)); -compute_topic_bitmask(_, [{hash, levels, Size} | Rest], Acc) -> - compute_topic_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size)); +compute_topic_bitmask(Tail, [{hash, levels, Size} | Rest], Acc) -> + Mask = + case lists:member('+', Tail) orelse lists:member('#', Tail) of + true -> 0; + false -> ones(Size) + end, + compute_topic_bitmask([], Rest, bitwise_concat(Acc, Mask, Size)); compute_topic_bitmask(_, [], Acc) -> Acc. diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl index c99063350..3d7e7cb41 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl @@ -115,6 +115,19 @@ t_iterate_wildcard(_Config) -> ), ok. +t_iterate_long_tail_wildcard(_Config) -> + Topic = "b/c/d/e/f/g", + TopicFilter = "b/c/d/e/+/+", + Timestamps = lists:seq(1, 100), + _ = [ + store(?ZONE, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) + || PublishedAt <- Timestamps + ], + ?assertEqual( + lists:sort([{"b/c/d/e/f/g", PublishedAt} || PublishedAt <- lists:seq(50, 100)]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, TopicFilter, 50)]) + ). + store(Zone, PublishedAt, Topic, Payload) -> ID = emqx_guid:gen(), emqx_replay_local_store:store(Zone, ID, PublishedAt, parse_topic(Topic), Payload). @@ -140,13 +153,29 @@ parse_topic(Topic) -> all() -> emqx_common_test_helpers:all(?MODULE). -init_per_testcase(TC, Config) -> +init_per_suite(Config) -> {ok, _} = application:ensure_all_started(emqx_replay), + Config. + +end_per_suite(_Config) -> + ok = application:stop(emqx_replay). + +init_per_testcase(TC, Config) -> + ok = set_zone_config(zone(TC), #{ + timestamp_bits => 64, + topic_bits_per_level => [8, 8, 32, 16], + epoch => 5 + }), {ok, _} = emqx_replay_local_store_sup:start_zone(zone(TC)), Config. -end_per_testcase(_TC, _Config) -> - ok = application:stop(emqx_replay). +end_per_testcase(TC, _Config) -> + ok = emqx_replay_local_store_sup:stop_zone(zone(TC)). zone(TC) -> list_to_atom(?MODULE_STRING ++ atom_to_list(TC)). + +set_zone_config(Zone, Options) -> + ok = application:set_env(emqx_replay, zone_config, #{ + Zone => {emqx_replay_message_storage, Options} + }). From 7f408da251fcfe311cc07b4ece07e3bbb36515b6 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 11 Jan 2023 18:43:31 +0300 Subject: [PATCH 26/49] refactor(ds): Use inline functions instead of macros --- .../src/emqx_replay_message_storage.erl | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index 759ddf559..fe0a0e08a 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -121,7 +121,15 @@ -export_type([db/0, iterator/0, schema/0]). --compile({inline, [ones/1, bitwise_concat/3]}). +-compile( + {inline, [ + bitwise_concat/3, + ones/1, + successor/1, + topic_hash_matches/3, + time_matches/3 + ]} +). %%================================================================================ %% Type declarations @@ -343,14 +351,6 @@ restore_iterator(DB, #{ %% Internal exports %%================================================================================ --define(topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), - (Bitstring band HashBitmask) == HashBitfilter -). - --define(time_matches(Bitstring, TimeBitfilter, TimeBitmask), - (Bitstring band TimeBitmask) >= TimeBitfilter -). - -spec bitsize(keymapper()) -> bits(). bitsize(#keymapper{bitsize = Bitsize}) -> Bitsize. @@ -424,8 +424,8 @@ compute_next_seek( time_bitmask = TimeBitmask } ) -> - HashMatches = ?topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), - TimeMatches = ?time_matches(Bitstring, TimeBitfilter, TimeBitmask), + HashMatches = topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), + TimeMatches = time_matches(Bitstring, TimeBitfilter, TimeBitmask), compute_next_seek(HashMatches, TimeMatches, Bitstring, Filter). %%================================================================================ @@ -508,8 +508,8 @@ match_next( time_bitmask = TimeBitmask } ) -> - HashMatches = ?topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), - TimeMatches = ?time_matches(Bitstring, TimeBitfilter, TimeBitmask), + HashMatches = topic_hash_matches(Bitstring, HashBitfilter, HashBitmask), + TimeMatches = time_matches(Bitstring, TimeBitfilter, TimeBitmask), case HashMatches and TimeMatches of true -> Message = {Topic, _Payload} = unwrap_message_value(Value), @@ -541,7 +541,7 @@ compute_next_seek( none -> none; _ -> - TimeMatches = ?time_matches(NextBitstring, TimeBitfilter, TimeBitmask), + TimeMatches = time_matches(NextBitstring, TimeBitfilter, TimeBitmask), compute_next_seek(true, TimeMatches, NextBitstring, Filter) end; %% `Bitstring` is out of the time range defined by `TimeBitfilter`. @@ -558,6 +558,12 @@ compute_next_seek( compute_next_seek(true, true, Bitstring, _It) -> Bitstring. +topic_hash_matches(Bitstring, HashBitfilter, HashBitmask) -> + (Bitstring band HashBitmask) == HashBitfilter. + +time_matches(Bitstring, TimeBitfilter, TimeBitmask) -> + (Bitstring band TimeBitmask) >= TimeBitfilter. + compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask) -> % Replace the bits of the timestamp in `Bistring` with bits from `Timebitfilter`. (Bitstring band (bnot TimeBitmask)) bor TimeBitfilter. From d875fa49d315c34ee2fa9e0136cafc8a1ba8d163 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 10 Jan 2023 11:57:04 +0300 Subject: [PATCH 27/49] feat(ds): Enable periodic iterator refresh This might be helpful during replays taking multiple tens of seconds so that underlying iterators won't hold onto in-memory / on-disk data structures for too long, preventing rocksdb from recycling them. --- .../src/emqx_replay_message_storage.erl | 71 ++++++++++++++++--- .../props/prop_replay_message_storage.erl | 39 ++++++++++ 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index fe0a0e08a..f58c006cd 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -95,10 +95,12 @@ -export([store/5]). -export([make_iterator/3]). +-export([make_iterator/4]). -export([next/1]). -export([preserve_iterator/1]). -export([restore_iterator/2]). +-export([refresh_iterator/1]). %% Debug/troubleshooting: %% Keymappers @@ -159,9 +161,18 @@ topic_bits_per_level := bits_per_level(), %% Maximum granularity of iteration over time. epoch := time(), + cf_options => emqx_replay_local_store:db_cf_options() }. +-type iteration_options() :: #{ + %% Request periodic iterator refresh. + %% This might be helpful during replays taking a lot of time (e.g. tens of seconds). + %% Note that `{every, 1000}` means 1000 _operations_ with the iterator which is not + %% the same as 1000 replayed messages. + iterator_refresh => {every, _NumOperations :: pos_integer()} +}. + %% Persistent configuration of the generation, it is used to create db %% record when the database is reopened -record(schema, {keymapper :: keymapper()}). @@ -173,14 +184,16 @@ cf :: rocksdb:cf_handle(), keymapper :: keymapper(), write_options = [{sync, true}] :: emqx_replay_local_store:db_write_options(), - read_options = [] :: emqx_replay_local_store:db_write_options() + read_options = [] :: emqx_replay_local_store:db_write_options(), + iteration_options = #{} :: iteration_options() }). -record(it, { handle :: rocksdb:itr_handle(), filter :: keyspace_filter(), cursor :: binary() | undefined, - next_action :: {seek, binary()} | next + next_action :: {seek, binary()} | next, + refresh_counter :: {non_neg_integer(), pos_integer()} | undefined }). -record(filter, { @@ -274,41 +287,51 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, rocksdb:put(DBHandle, CFHandle, Key, Value, DB#db.write_options). -spec make_iterator(db(), emqx_topic:words(), time() | earliest) -> + {ok, iterator()} | {error, _TODO}. +make_iterator(DB, TopicFilter, StartTime) -> + % TODO wire it up somehow to the upper level + make_iterator(DB, TopicFilter, StartTime, DB#db.iteration_options). + +-spec make_iterator(db(), emqx_topic:words(), time() | earliest, iteration_options()) -> % {error, invalid_start_time}? might just start from the beginning of time % and call it a day: client violated the contract anyway. {ok, iterator()} | {error, _TODO}. -make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime) -> +make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime, Options) -> case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of {ok, ITHandle} -> % TODO earliest Filter = make_keyspace_filter(TopicFilter, StartTime, DB#db.keymapper), InitialSeek = combine(compute_initial_seek(Filter), <<>>, DB#db.keymapper), + RefreshCounter = make_refresh_counter(maps:get(iterator_refresh, Options, undefined)), {ok, #it{ handle = ITHandle, filter = Filter, - next_action = {seek, InitialSeek} + next_action = {seek, InitialSeek}, + refresh_counter = RefreshCounter }}; Err -> Err end. -spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}. -next(It = #it{filter = #filter{keymapper = Keymapper}}) -> +next(It0 = #it{filter = #filter{keymapper = Keymapper}}) -> + It = maybe_refresh_iterator(It0), case rocksdb:iterator_move(It#it.handle, It#it.next_action) of % spec says `{ok, Key}` is also possible but the implementation says it's not {ok, Key, Value} -> + % Preserve last seen key in the iterator so it could be restored / refreshed later. + ItNext = It#it{cursor = Key}, Bitstring = extract(Key, Keymapper), case match_next(Bitstring, Value, It#it.filter) of {_Topic, Payload} -> - % Preserve last seen key in the iterator so it could be restored later. - {value, Payload, It#it{cursor = Key, next_action = next}}; + {value, Payload, ItNext#it{next_action = next}}; next -> - next(It#it{next_action = next}); + next(ItNext#it{next_action = next}); NextBitstring when is_integer(NextBitstring) -> NextSeek = combine(NextBitstring, <<>>, Keymapper), - next(It#it{next_action = {seek, NextSeek}}); + next(ItNext#it{next_action = {seek, NextSeek}}); none -> - stop_iteration(It) + stop_iteration(ItNext) end; {error, invalid_iterator} -> stop_iteration(It); @@ -347,6 +370,22 @@ restore_iterator(DB, #{ Err end. +-spec refresh_iterator(iterator()) -> iterator(). +refresh_iterator(It = #it{handle = Handle, cursor = Cursor, next_action = Action}) -> + case rocksdb:iterator_refresh(Handle) of + ok when Action =:= next -> + % Now the underlying iterator is invalid, need to seek instead. + It#it{next_action = {seek, successor(Cursor)}}; + ok -> + % Now the underlying iterator is invalid, but will seek soon anyway. + It; + {error, _} -> + % Implementation could in theory return an {error, ...} tuple. + % Supposedly our best bet is to ignore it. + % TODO logging? + It + end. + %%================================================================================ %% Internal exports %%================================================================================ @@ -687,6 +726,18 @@ substring(I, Offset, Size) -> data_cf(GenId) -> ?MODULE_STRING ++ integer_to_list(GenId). +make_refresh_counter({every, N}) when is_integer(N), N > 0 -> + {0, N}; +make_refresh_counter(undefined) -> + undefined. + +maybe_refresh_iterator(It = #it{refresh_counter = {N, N}}) -> + refresh_iterator(It#it{refresh_counter = {0, N}}); +maybe_refresh_iterator(It = #it{refresh_counter = {M, N}}) -> + It#it{refresh_counter = {M + 1, N}}; +maybe_refresh_iterator(It = #it{refresh_counter = undefined}) -> + It. + stop_iteration(It) -> ok = rocksdb:iterator_close(It#it.handle), none. diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl index 9619c4f05..20c897c2a 100644 --- a/apps/emqx_replay/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -150,6 +150,41 @@ prop_iterate_eq_iterate_with_preserve_restore() -> ) end). +prop_iterate_eq_iterate_with_refresh() -> + TBPL = [4, 8, 16, 12], + Options = #{ + timestamp_bits => 32, + topic_bits_per_level => TBPL, + epoch => 500 + }, + {DB, _Handle} = open_db(make_filepath(?FUNCTION_NAME), Options), + ?FORALL(Stream, non_empty(messages(topic(TBPL))), begin + % TODO + % This proptest is also impure, see above. + ok = store_db(DB, Stream), + ?FORALL( + { + {Topic, _}, + Pat, + StartTime, + RefreshEvery + }, + { + nth(Stream), + topic_filter_pattern(), + start_time(), + pos_integer() + }, + ?TIMEOUT(5000, begin + TopicFilter = make_topic_filter(Pat, Topic), + IterationOptions = #{iterator_refresh => {every, RefreshEvery}}, + Iterator = make_iterator(DB, TopicFilter, StartTime, IterationOptions), + Messages = iterate_db(Iterator), + equals(Messages, iterate_db(DB, TopicFilter, StartTime)) + end) + ) + end). + % store_message_stream(DB, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) -> % MessageID = emqx_guid:gen(), % PublishedAt = ChunkNum, @@ -184,6 +219,10 @@ make_iterator(DB, TopicFilter, StartTime) -> {ok, It} = emqx_replay_message_storage:make_iterator(DB, TopicFilter, StartTime), It. +make_iterator(DB, TopicFilter, StartTime, Options) -> + {ok, It} = emqx_replay_message_storage:make_iterator(DB, TopicFilter, StartTime, Options), + It. + run_iterator_commands([iterate | Rest], It, DB) -> case emqx_replay_message_storage:next(It) of {value, Payload, ItNext} -> From 2f385022cd0c9ed5d92d27bf53c38b3eea7c7f0f Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 12 Jan 2023 17:12:45 +0300 Subject: [PATCH 28/49] chore(ds): Rename testsuite to reflect test subject better --- ...play_storage_SUITE.erl => emqx_replay_local_store_SUITE.erl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename apps/emqx_replay/test/{emqx_replay_storage_SUITE.erl => emqx_replay_local_store_SUITE.erl} (99%) diff --git a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl similarity index 99% rename from apps/emqx_replay/test/emqx_replay_storage_SUITE.erl rename to apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index 3d7e7cb41..5a1bb59f4 100644 --- a/apps/emqx_replay/test/emqx_replay_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -13,7 +13,7 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %%-------------------------------------------------------------------- --module(emqx_replay_storage_SUITE). +-module(emqx_replay_local_store_SUITE). -compile(export_all). -compile(nowarn_export_all). From cf70184ebd17afb476e67a3ec1293bdf2ac5ceed Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 12 Jan 2023 17:59:15 +0300 Subject: [PATCH 29/49] test(ds): Provide more general `keymapper_info/1` --- .../src/emqx_replay_message_storage.erl | 8 ++++---- .../test/props/prop_replay_message_storage.erl | 14 +++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index f58c006cd..d14a07c6d 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -105,7 +105,7 @@ %% Debug/troubleshooting: %% Keymappers -export([ - bitsize/1, + keymapper_info/1, compute_bitstring/3, compute_topic_bitmask/2, compute_time_bitmask/1, @@ -390,9 +390,9 @@ refresh_iterator(It = #it{handle = Handle, cursor = Cursor, next_action = Action %% Internal exports %%================================================================================ --spec bitsize(keymapper()) -> bits(). -bitsize(#keymapper{bitsize = Bitsize}) -> - Bitsize. +-spec keymapper_info(keymapper()) -> [bitsource()]. +keymapper_info(#keymapper{source = Source, bitsize = Bitsize, epoch = Epoch}) -> + #{source => Source, bitsize => Bitsize, epoch => Epoch}. make_message_key(Topic, PublishedAt, MessageID, Keymapper) -> combine(compute_bitstring(Topic, PublishedAt, Keymapper), MessageID, Keymapper). diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl index 20c897c2a..c468097c7 100644 --- a/apps/emqx_replay/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -28,13 +28,14 @@ %%-------------------------------------------------------------------- prop_bitstring_computes() -> - ?FORALL(Keymapper, keymapper(), begin - Bitsize = emqx_replay_message_storage:bitsize(Keymapper), + ?FORALL( + Keymapper, + keymapper(), ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin BS = emqx_replay_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), - is_integer(BS) andalso (BS < (1 bsl Bitsize)) + is_integer(BS) andalso (BS < (1 bsl get_keymapper_bitsize(Keymapper))) end) - end). + ). prop_topic_bitmask_computes() -> Keymapper = make_keymapper(16, [8, 12, 16], 100), @@ -56,7 +57,7 @@ prop_next_seek_monotonic() -> ), ?FORALL( Bitstring, - bitstr(emqx_replay_message_storage:bitsize(Keymapper)), + bitstr(get_keymapper_bitsize(Keymapper)), emqx_replay_message_storage:compute_next_seek(Bitstring, Filter) >= Bitstring ) end @@ -436,6 +437,9 @@ make_keymapper(TimestampBits, TopicBits, MaxEpoch) -> epoch => MaxEpoch }). +get_keymapper_bitsize(Keymapper) -> + maps:get(bitsize, emqx_replay_message_storage:keymapper_info(Keymapper)). + -spec interleave(list({Tag, list(E)}), rand:state()) -> list({Tag, E}). interleave(Seqs, Rng) -> interleave(Seqs, length(Seqs), Rng). From 2bf8a07b05a3dbb8ca13ca494b57056cc4df0132 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 12 Jan 2023 17:59:55 +0300 Subject: [PATCH 30/49] test(ds): Split unit tests off into a full-fledged suite --- .../src/emqx_replay_message_storage.erl | 160 -------------- .../emqx_replay_message_storage_SUITE.erl | 200 ++++++++++++++++++ 2 files changed, 200 insertions(+), 160 deletions(-) create mode 100644 apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index d14a07c6d..b1c5f1806 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -741,163 +741,3 @@ maybe_refresh_iterator(It = #it{refresh_counter = undefined}) -> stop_iteration(It) -> ok = rocksdb:iterator_close(It#it.handle), none. - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - -make_keymapper_test_() -> - [ - ?_assertEqual( - #keymapper{ - source = [ - {timestamp, 9, 23}, - {hash, level, 2}, - {hash, level, 4}, - {hash, levels, 8}, - {timestamp, 0, 9} - ], - bitsize = 46, - epoch = 512 - }, - make_keymapper(#{ - timestamp_bits => 32, - topic_bits_per_level => [2, 4, 8], - epoch => 1000 - }) - ), - ?_assertEqual( - #keymapper{ - source = [ - {timestamp, 0, 32}, - {hash, levels, 16} - ], - bitsize = 48, - epoch = 1 - }, - make_keymapper(#{ - timestamp_bits => 32, - topic_bits_per_level => [16], - epoch => 1 - }) - ) - ]. - -compute_test_bitmask(TopicFilter) -> - compute_topic_bitmask( - TopicFilter, - [ - {hash, level, 3}, - {hash, level, 4}, - {hash, level, 5}, - {hash, levels, 2} - ], - 0 - ). - -bitmask_test_() -> - [ - ?_assertEqual( - 2#111_1111_11111_11, - compute_test_bitmask([<<"foo">>, <<"bar">>]) - ), - ?_assertEqual( - 2#111_0000_11111_11, - compute_test_bitmask([<<"foo">>, '+']) - ), - ?_assertEqual( - 2#111_0000_00000_11, - compute_test_bitmask([<<"foo">>, '+', '+']) - ), - ?_assertEqual( - 2#111_0000_11111_00, - compute_test_bitmask([<<"foo">>, '+', <<"bar">>, '+']) - ) - ]. - -wildcard_bitmask_test_() -> - [ - ?_assertEqual( - 2#000_0000_00000_00, - compute_test_bitmask(['#']) - ), - ?_assertEqual( - 2#111_0000_00000_00, - compute_test_bitmask([<<"foo">>, '#']) - ), - ?_assertEqual( - 2#111_1111_11111_00, - compute_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, '#']) - ), - ?_assertEqual( - 2#111_1111_11111_11, - compute_test_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, '#']) - ) - ]. - -%% Filter = |123|***|678|***| -%% Mask = |123|***|678|***| -%% Key1 = |123|011|108|121| → Seek = 0 |123|011|678|000| -%% Key2 = |123|011|679|919| → Seek = 0 |123|012|678|000| -%% Key3 = |123|999|679|001| → Seek = 1 |123|000|678|000| → eos -%% Key4 = |125|011|179|017| → Seek = 1 |123|000|678|000| → eos - -compute_test_topic_seek(Bitstring, Bitfilter, HBitmask) -> - compute_topic_seek( - Bitstring, - Bitfilter, - HBitmask, - [ - {hash, level, 8}, - {hash, level, 8}, - {hash, level, 16}, - {hash, levels, 12} - ], - 8 + 8 + 16 + 12 - ). - -next_seek_test_() -> - [ - ?_assertMatch( - none, - compute_test_topic_seek( - 16#FD_42_4242_043, - 16#FD_42_4242_042, - 16#FF_FF_FFFF_FFF - ) - ), - ?_assertMatch( - 16#FD_11_0678_000, - compute_test_topic_seek( - 16#FD_11_0108_121, - 16#FD_00_0678_000, - 16#FF_00_FFFF_000 - ) - ), - ?_assertMatch( - 16#FD_12_0678_000, - compute_test_topic_seek( - 16#FD_11_0679_919, - 16#FD_00_0678_000, - 16#FF_00_FFFF_000 - ) - ), - ?_assertMatch( - none, - compute_test_topic_seek( - 16#FD_FF_0679_001, - 16#FD_00_0678_000, - 16#FF_00_FFFF_000 - ) - ), - ?_assertMatch( - none, - compute_test_topic_seek( - 16#FE_11_0179_017, - 16#FD_00_0678_000, - 16#FF_00_FFFF_000 - ) - ) - ]. - --endif. diff --git a/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl new file mode 100644 index 000000000..3fca48a7b --- /dev/null +++ b/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl @@ -0,0 +1,200 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_replay_message_storage_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("stdlib/include/assert.hrl"). + +-import(emqx_replay_message_storage, [ + make_keymapper/1, + keymapper_info/1, + compute_topic_bitmask/2, + compute_time_bitmask/1, + compute_topic_seek/4 +]). + +all() -> emqx_common_test_helpers:all(?MODULE). + +t_make_keymapper(_) -> + ?assertMatch( + #{ + source := [ + {timestamp, 9, 23}, + {hash, level, 2}, + {hash, level, 4}, + {hash, levels, 8}, + {timestamp, 0, 9} + ], + bitsize := 46, + epoch := 512 + }, + keymapper_info( + make_keymapper(#{ + timestamp_bits => 32, + topic_bits_per_level => [2, 4, 8], + epoch => 1000 + }) + ) + ). + +t_make_keymapper_single_hash_level(_) -> + ?assertMatch( + #{ + source := [ + {timestamp, 0, 32}, + {hash, levels, 16} + ], + bitsize := 48, + epoch := 1 + }, + keymapper_info( + make_keymapper(#{ + timestamp_bits => 32, + topic_bits_per_level => [16], + epoch => 1 + }) + ) + ). + +t_make_keymapper_no_timestamp(_) -> + ?assertMatch( + #{ + source := [ + {hash, level, 4}, + {hash, level, 8}, + {hash, levels, 16} + ], + bitsize := 28, + epoch := 1 + }, + keymapper_info( + make_keymapper(#{ + timestamp_bits => 0, + topic_bits_per_level => [4, 8, 16], + epoch => 42 + }) + ) + ). + +t_compute_topic_bitmask(_) -> + KM = make_keymapper(#{topic_bits_per_level => [3, 4, 5, 2], timestamp_bits => 0, epoch => 1}), + ?assertEqual( + 2#111_1111_11111_11, + compute_topic_bitmask([<<"foo">>, <<"bar">>], KM) + ), + ?assertEqual( + 2#111_0000_11111_11, + compute_topic_bitmask([<<"foo">>, '+'], KM) + ), + ?assertEqual( + 2#111_0000_00000_11, + compute_topic_bitmask([<<"foo">>, '+', '+'], KM) + ), + ?assertEqual( + 2#111_0000_11111_00, + compute_topic_bitmask([<<"foo">>, '+', <<"bar">>, '+'], KM) + ). + +t_compute_topic_bitmask_wildcard(_) -> + KM = make_keymapper(#{topic_bits_per_level => [3, 4, 5, 2], timestamp_bits => 0, epoch => 1}), + ?assertEqual( + 2#000_0000_00000_00, + compute_topic_bitmask(['#'], KM) + ), + ?assertEqual( + 2#111_0000_00000_00, + compute_topic_bitmask([<<"foo">>, '#'], KM) + ), + ?assertEqual( + 2#111_1111_11111_00, + compute_topic_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, '#'], KM) + ). + +t_compute_topic_bitmask_wildcard_long_tail(_) -> + KM = make_keymapper(#{topic_bits_per_level => [3, 4, 5, 2], timestamp_bits => 0, epoch => 1}), + ?assertEqual( + 2#111_1111_11111_11, + compute_topic_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, <<"xyzzy">>], KM) + ), + ?assertEqual( + 2#111_1111_11111_00, + compute_topic_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, '#'], KM) + ). + +t_compute_time_bitmask(_) -> + KM = make_keymapper(#{topic_bits_per_level => [1, 2, 3], timestamp_bits => 10, epoch => 200}), + ?assertEqual(2#111_000000_1111111, compute_time_bitmask(KM)). + +t_compute_time_bitmask_epoch_only(_) -> + KM = make_keymapper(#{topic_bits_per_level => [1, 2, 3], timestamp_bits => 10, epoch => 1}), + ?assertEqual(2#1111111111_000000, compute_time_bitmask(KM)). + +%% Filter = |123|***|678|***| +%% Mask = |123|***|678|***| +%% Key1 = |123|011|108|121| → Seek = 0 |123|011|678|000| +%% Key2 = |123|011|679|919| → Seek = 0 |123|012|678|000| +%% Key3 = |123|999|679|001| → Seek = 1 |123|000|678|000| → eos +%% Key4 = |125|011|179|017| → Seek = 1 |123|000|678|000| → eos + +t_compute_next_topic_seek(_) -> + KM = make_keymapper(#{topic_bits_per_level => [8, 8, 16, 12], timestamp_bits => 0, epoch => 1}), + ?assertMatch( + none, + compute_topic_seek( + 16#FD_42_4242_043, + 16#FD_42_4242_042, + 16#FF_FF_FFFF_FFF, + KM + ) + ), + ?assertMatch( + 16#FD_11_0678_000, + compute_topic_seek( + 16#FD_11_0108_121, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000, + KM + ) + ), + ?assertMatch( + 16#FD_12_0678_000, + compute_topic_seek( + 16#FD_11_0679_919, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000, + KM + ) + ), + ?assertMatch( + none, + compute_topic_seek( + 16#FD_FF_0679_001, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000, + KM + ) + ), + ?assertMatch( + none, + compute_topic_seek( + 16#FE_11_0179_017, + 16#FD_00_0678_000, + 16#FF_00_FFFF_000, + KM + ) + ). From f5a7b49f57679850075db724600fdff2ab2eda04 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 16 Jan 2023 17:25:55 +0300 Subject: [PATCH 31/49] feat(ds): Wire iteration options up to the app config --- apps/emqx_replay/src/emqx_replay_conf.erl | 46 ++++++++++++++----- .../src/emqx_replay_local_store.erl | 2 +- .../src/emqx_replay_message_storage.erl | 20 +++++--- .../test/emqx_replay_local_store_SUITE.erl | 5 +- 4 files changed, 53 insertions(+), 20 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_conf.erl b/apps/emqx_replay/src/emqx_replay_conf.erl index 8f7105312..57ba87ddf 100644 --- a/apps/emqx_replay/src/emqx_replay_conf.erl +++ b/apps/emqx_replay/src/emqx_replay_conf.erl @@ -20,27 +20,51 @@ %% API: -export([zone_config/1, db_options/0]). +-export([zone_iteration_options/1]). +-export([default_iteration_options/0]). + %%================================================================================ %% API funcions %%================================================================================ -define(APP, emqx_replay). --spec zone_config(emqx_types:zone()) -> - {module(), term()}. +-type zone() :: emqx_types:zone(). +-type config() :: + {emqx_replay_message_storage, emqx_replay_message_storage:options()} + | {module(), _Options}. + +-spec zone_config(zone()) -> config(). zone_config(Zone) -> - DefaultConf = - #{ - timestamp_bits => 64, - topic_bits_per_level => [8, 8, 8, 32, 16], - epoch => 5 - }, - DefaultZoneConfig = application:get_env( - ?APP, default_zone_config, {emqx_replay_message_storage, DefaultConf} - ), + DefaultZoneConfig = application:get_env(?APP, default_zone_config, default_zone_config()), Zones = application:get_env(?APP, zone_config, #{}), maps:get(Zone, Zones, DefaultZoneConfig). +-spec zone_iteration_options(zone()) -> emqx_replay_message_storage:iteration_options(). +zone_iteration_options(Zone) -> + case zone_config(Zone) of + {emqx_replay_message_storage, Config} -> + maps:get(iteration, Config, default_iteration_options()); + {_Module, _} -> + default_iteration_options() + end. + +-spec default_iteration_options() -> emqx_replay_message_storage:iteration_options(). +default_iteration_options() -> + {emqx_replay_message_storage, Config} = default_zone_config(), + maps:get(iteration, Config). + +-spec default_zone_config() -> config(). +default_zone_config() -> + {emqx_replay_message_storage, #{ + timestamp_bits => 64, + topic_bits_per_level => [8, 8, 8, 32, 16], + epoch => 5, + iteration => #{ + iterator_refresh => {every, 100} + } + }}. + -spec db_options() -> emqx_replay_local_store:db_options(). db_options() -> application:get_env(?APP, db_options, []). diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 23cedb04c..15a400a92 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -150,7 +150,7 @@ read_metadata(S) -> -spec read_metadata(gen_id(), #s{}) -> ok. read_metadata(GenId, S = #s{zone = Zone, db = DBHandle, column_families = CFs}) -> Gen = #generation{module = Mod, data = Data} = schema_get_gen(DBHandle, GenId), - DB = Mod:open(DBHandle, GenId, CFs, Data), + DB = Mod:open(Zone, DBHandle, GenId, CFs, Data), meta_put(Zone, GenId, Gen#generation{data = DB}). -spec ensure_current_generation(#s{}) -> #s{}. diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index b1c5f1806..dd6c41598 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -90,7 +90,7 @@ %%================================================================================ %% API: --export([create_new/3, open/4]). +-export([create_new/3, open/5]). -export([make_keymapper/1]). -export([store/5]). @@ -123,6 +123,9 @@ -export_type([db/0, iterator/0, schema/0]). +-export_type([options/0]). +-export_type([iteration_options/0]). + -compile( {inline, [ bitwise_concat/3, @@ -162,6 +165,8 @@ %% Maximum granularity of iteration over time. epoch := time(), + iteration => iteration_options(), + cf_options => emqx_replay_local_store:db_cf_options() }. @@ -180,12 +185,12 @@ -opaque schema() :: #schema{}. -record(db, { + zone :: emqx_types:zone(), handle :: rocksdb:db_handle(), cf :: rocksdb:cf_handle(), keymapper :: keymapper(), write_options = [{sync, true}] :: emqx_replay_local_store:db_write_options(), - read_options = [] :: emqx_replay_local_store:db_write_options(), - iteration_options = #{} :: iteration_options() + read_options = [] :: emqx_replay_local_store:db_write_options() }). -record(it, { @@ -233,7 +238,6 @@ %% Create a new column family for the generation and a serializable representation of the schema -spec create_new(rocksdb:db_handle(), emqx_replay_local_store:gen_id(), options()) -> {schema(), emqx_replay_local_store:cf_refs()}. -%{schema(), emqx_replay_local_store:cf_refs()}. create_new(DBHandle, GenId, Options) -> CFName = data_cf(GenId), CFOptions = maps:get(cf_options, Options, []), @@ -243,15 +247,17 @@ create_new(DBHandle, GenId, Options) -> %% Reopen the database -spec open( + emqx_types:zone(), rocksdb:db_handle(), emqx_replay_local_store:gen_id(), emqx_replay_local_store:cf_refs(), schema() ) -> db(). -open(DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) -> +open(Zone, DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) -> {value, {_, CFHandle}} = lists:keysearch(data_cf(GenId), 1, CFs), #db{ + zone = Zone, handle = DBHandle, cf = CFHandle, keymapper = Keymapper @@ -289,8 +295,8 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, -spec make_iterator(db(), emqx_topic:words(), time() | earliest) -> {ok, iterator()} | {error, _TODO}. make_iterator(DB, TopicFilter, StartTime) -> - % TODO wire it up somehow to the upper level - make_iterator(DB, TopicFilter, StartTime, DB#db.iteration_options). + Options = emqx_replay_conf:zone_iteration_options(DB#db.zone), + make_iterator(DB, TopicFilter, StartTime, Options). -spec make_iterator(db(), emqx_topic:words(), time() | earliest, iteration_options()) -> % {error, invalid_start_time}? might just start from the beginning of time diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index 5a1bb59f4..eee802e69 100644 --- a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -164,7 +164,10 @@ init_per_testcase(TC, Config) -> ok = set_zone_config(zone(TC), #{ timestamp_bits => 64, topic_bits_per_level => [8, 8, 32, 16], - epoch => 5 + epoch => 5, + iteration => #{ + iterator_refresh => {every, 5} + } }), {ok, _} = emqx_replay_local_store_sup:start_zone(zone(TC)), Config. From f80e906553063d6fd2089e865458ffbe9847a2f2 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 16 Jan 2023 17:26:57 +0300 Subject: [PATCH 32/49] fix(ds): Correct typespec --- apps/emqx_replay/src/emqx_replay_message_storage.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index dd6c41598..f2a6afaa6 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -396,7 +396,8 @@ refresh_iterator(It = #it{handle = Handle, cursor = Cursor, next_action = Action %% Internal exports %%================================================================================ --spec keymapper_info(keymapper()) -> [bitsource()]. +-spec keymapper_info(keymapper()) -> + #{source := [bitsource()], bitsize := bits(), epoch := time()}. keymapper_info(#keymapper{source = Source, bitsize = Bitsize, epoch = Epoch}) -> #{source => Source, bitsize => Bitsize, epoch => Epoch}. From cf6a5e1643a64c7e54362a88ee81eddebfce1aec Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 17 Jan 2023 13:31:01 +0300 Subject: [PATCH 33/49] feat(ds): Allow to create new storage generations --- apps/emqx_replay/src/emqx_replay.app.src | 2 +- apps/emqx_replay/src/emqx_replay.erl | 4 +- apps/emqx_replay/src/emqx_replay_conf.erl | 13 +- .../src/emqx_replay_local_store.erl | 163 +++++++++++++----- .../test/emqx_replay_local_store_SUITE.erl | 2 +- 5 files changed, 130 insertions(+), 54 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay.app.src b/apps/emqx_replay/src/emqx_replay.app.src index 7769e82e9..9c00a78ca 100644 --- a/apps/emqx_replay/src/emqx_replay.app.src +++ b/apps/emqx_replay/src/emqx_replay.app.src @@ -5,7 +5,7 @@ {vsn, "0.1.0"}, {modules, []}, {registered, []}, - {applications, [kernel, stdlib, rocksdb]}, + {applications, [kernel, stdlib, rocksdb, gproc]}, {mod, {emqx_replay_app, []}}, {env, []} ]}. diff --git a/apps/emqx_replay/src/emqx_replay.erl b/apps/emqx_replay/src/emqx_replay.erl index ee83e35d9..fb1ec39c6 100644 --- a/apps/emqx_replay/src/emqx_replay.erl +++ b/apps/emqx_replay/src/emqx_replay.erl @@ -27,8 +27,10 @@ %% parsed -type topic() :: list(binary()). +%% Timestamp +%% Earliest possible timestamp is 0. %% TODO granularity? --type time() :: integer(). +-type time() :: non_neg_integer(). %%================================================================================ %% API funcions diff --git a/apps/emqx_replay/src/emqx_replay_conf.erl b/apps/emqx_replay/src/emqx_replay_conf.erl index 57ba87ddf..46fa53867 100644 --- a/apps/emqx_replay/src/emqx_replay_conf.erl +++ b/apps/emqx_replay/src/emqx_replay_conf.erl @@ -23,6 +23,12 @@ -export([zone_iteration_options/1]). -export([default_iteration_options/0]). +-type backend_config() :: + {emqx_replay_message_storage, emqx_replay_message_storage:options()} + | {module(), _Options}. + +-export_type([backend_config/0]). + %%================================================================================ %% API funcions %%================================================================================ @@ -30,11 +36,8 @@ -define(APP, emqx_replay). -type zone() :: emqx_types:zone(). --type config() :: - {emqx_replay_message_storage, emqx_replay_message_storage:options()} - | {module(), _Options}. --spec zone_config(zone()) -> config(). +-spec zone_config(zone()) -> backend_config(). zone_config(Zone) -> DefaultZoneConfig = application:get_env(?APP, default_zone_config, default_zone_config()), Zones = application:get_env(?APP, zone_config, #{}), @@ -54,7 +57,7 @@ default_iteration_options() -> {emqx_replay_message_storage, Config} = default_zone_config(), maps:get(iteration, Config). --spec default_zone_config() -> config(). +-spec default_zone_config() -> backend_config(). default_zone_config() -> {emqx_replay_message_storage, #{ timestamp_bits => 64, diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 15a400a92..0138ffed5 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -19,8 +19,9 @@ %% API: -export([start_link/1]). +-export([create_generation/3]). --export([make_iterator/3, store/5, next/1]). +-export([store/5, make_iterator/3, next/1]). %% behavior callbacks: -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -40,13 +41,18 @@ -type cf_refs() :: [{string(), rocksdb:cf_handle()}]. --record(generation, { +%% Message storage generation +%% Keep in mind that instances of this type are persisted in long-term storage. +-type generation() :: #{ %% Module that handles data for the generation - module :: module(), - %% Module-specific attributes - data :: term() - % time_range :: {emqx_replay:time(), emqx_replay:time()} -}). + module := module(), + %% Module-specific data defined at generation creation time + data := term(), + %% When should this generation become active? + %% This generation should only contain messages timestamped no earlier than that. + %% The very first generation will have `since` equal 0. + since := emqx_replay:time() +}. -record(s, { zone :: emqx_types:zone(), @@ -61,6 +67,7 @@ -type gen_id() :: 0..16#ffff. +-opaque state() :: #s{}. -opaque iterator() :: #it{}. %% Contents of the default column family: @@ -70,19 +77,32 @@ -define(DEFAULT_CF_OPTS, []). +-define(REF(Zone), {via, gproc, {n, l, {?MODULE, Zone}}}). + %%================================================================================ %% API funcions %%================================================================================ -spec start_link(emqx_types:zone()) -> {ok, pid()}. start_link(Zone) -> - gen_server:start_link(?MODULE, [Zone], []). + gen_server:start_link(?REF(Zone), ?MODULE, [Zone], []). + +-spec create_generation(emqx_types:zone(), emqx_replay:time(), emqx_replay_conf:backend_config()) -> + {ok, gen_id()}. +create_generation(Zone, Since, Config = {_Module, _Options}) -> + gen_server:call(?REF(Zone), {create_generation, Since, Config}). + +-spec store(emqx_types:zone(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary()) -> + ok | {error, _TODO}. +store(Zone, GUID, Time, Topic, Msg) -> + #{module := Mod, data := Data} = meta_lookup_gen(Zone, Time), + Mod:store(Data, GUID, Time, Topic, Msg). -spec make_iterator(emqx_types:zone(), emqx_topic:words(), emqx_replay_message_storage:time()) -> {ok, _TODO} | {error, _TODO}. make_iterator(Zone, TopicFilter, StartTime) -> %% TODO: this is not supposed to work like this. Just a mock-up - #generation{module = Mod, data = Data} = meta_lookup(Zone, 0), + #{module := Mod, data := Data} = meta_lookup_gen(Zone, StartTime), case Mod:make_iterator(Data, TopicFilter, StartTime) of {ok, It} -> {ok, #it{ @@ -93,13 +113,6 @@ make_iterator(Zone, TopicFilter, StartTime) -> Err end. --spec store(emqx_types:zone(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary()) -> - ok | {error, _TODO}. -store(Zone, GUID, Time, Topic, Msg) -> - %% TODO: this is not supposed to work like this. Just a mock-up - #generation{module = Mod, data = Data} = meta_lookup(Zone, 0), - Mod:store(Data, GUID, Time, Topic, Msg). - -spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}. next(#it{module = Mod, data = It0}) -> case Mod:next(It0) of @@ -125,6 +138,9 @@ init([Zone]) -> read_metadata(S), {ok, S}. +handle_call({create_generation, Since, Config}, _From, S) -> + {ok, GenId, NS} = create_new_gen(Since, Config, S), + {reply, {ok, GenId}, NS}; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. @@ -142,41 +158,50 @@ terminate(_Reason, #s{db = DB, zone = Zone}) -> %% Internal functions %%================================================================================ --spec read_metadata(#s{}) -> ok. -read_metadata(S) -> - %% TODO: just a mockup to make the existing tests pass - read_metadata(0, S). +-spec read_metadata(state()) -> ok. +read_metadata(S = #s{db = DBHandle}) -> + Current = schema_get_current(DBHandle), + lists:foreach(fun(GenId) -> read_metadata(GenId, S) end, lists:seq(0, Current)). --spec read_metadata(gen_id(), #s{}) -> ok. -read_metadata(GenId, S = #s{zone = Zone, db = DBHandle, column_families = CFs}) -> - Gen = #generation{module = Mod, data = Data} = schema_get_gen(DBHandle, GenId), - DB = Mod:open(Zone, DBHandle, GenId, CFs, Data), - meta_put(Zone, GenId, Gen#generation{data = DB}). +-spec read_metadata(gen_id(), state()) -> ok. +read_metadata(GenId, S = #s{zone = Zone, db = DBHandle}) -> + Gen = open_gen(GenId, schema_get_gen(DBHandle, GenId), S), + meta_register_gen(Zone, GenId, Gen). --spec ensure_current_generation(#s{}) -> #s{}. -ensure_current_generation(S = #s{zone = Zone, db = DBHandle, column_families = CFs}) -> +-spec ensure_current_generation(state()) -> state(). +ensure_current_generation(S = #s{zone = Zone, db = DBHandle}) -> case schema_get_current(DBHandle) of undefined -> - GenId = 0, - ok = schema_put_current(DBHandle, GenId), - create_new_generation_schema(GenId, S); + Config = emqx_replay_conf:zone_config(Zone), + {ok, _, NS} = create_new_gen(0, Config, S), + NS; _GenId -> S end. --spec create_new_generation_schema(gen_id(), #s{}) -> #s{}. -create_new_generation_schema( - GenId, S = #s{zone = Zone, db = DBHandle, column_families = CFs} -) -> - {Module, Options} = emqx_replay_conf:zone_config(Zone), - {NewGenData, NewCFs} = Module:create_new(DBHandle, GenId, Options), - NewGen = #generation{ - module = Module, - data = NewGenData - }, +-spec create_new_gen(emqx_replay:time(), emqx_replay_conf:backend_config(), state()) -> + {ok, gen_id(), state()}. +create_new_gen(Since, Config, S = #s{zone = Zone, db = DBHandle}) -> + GenId = get_next_id(meta_get_current(Zone)), + GenId = get_next_id(schema_get_current(DBHandle)), + % TODO: Propagate errors to clients. + true = is_gen_valid(Zone, GenId, Since), + {ok, Gen, NS} = create_gen(GenId, Since, Config, S), %% TODO: Transaction? Column family creation can't be transactional, anyway. - ok = schema_put_gen(DBHandle, GenId, NewGen), - S#s{column_families = NewCFs ++ CFs}. + ok = schema_put_gen(DBHandle, GenId, Gen), + ok = schema_put_current(DBHandle, GenId), + ok = meta_register_gen(Zone, GenId, Gen), + {ok, GenId, NS}. + +% -spec +create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, column_families = CFs}) -> + {Schema, NewCFs} = Module:create_new(DBHandle, GenId, Options), + Gen = #{ + module => Module, + data => Schema, + since => Since + }, + {ok, Gen, S#s{column_families = NewCFs ++ CFs}}. -spec open_db(emqx_types:zone()) -> {ok, rocksdb:db_handle(), cf_refs()} | {error, _TODO}. open_db(Zone) -> @@ -198,18 +223,27 @@ open_db(Zone) -> Error end. +-spec open_gen(gen_id(), generation(), state()) -> generation(). +open_gen( + GenId, + Gen = #{module := Mod, data := Data}, + #s{zone = Zone, db = DBHandle, column_families = CFs} +) -> + DB = Mod:open(Zone, DBHandle, GenId, CFs, Data), + Gen#{data := DB}. + %% Functions for dealing with the metadata stored persistently in rocksdb -define(CURRENT_GEN, <<"current">>). -define(SCHEMA_WRITE_OPTS, []). -define(SCHEMA_READ_OPTS, []). --spec schema_get_gen(rocksdb:db_handle(), gen_id()) -> #generation{}. +-spec schema_get_gen(rocksdb:db_handle(), gen_id()) -> generation(). schema_get_gen(DBHandle, GenId) -> {ok, Bin} = rocksdb:get(DBHandle, schema_gen_key(GenId), ?SCHEMA_READ_OPTS), binary_to_term(Bin). --spec schema_put_gen(rocksdb:db_handle(), gen_id(), #generation{}) -> ok | {error, _}. +-spec schema_put_gen(rocksdb:db_handle(), gen_id(), generation()) -> ok | {error, _}. schema_put_gen(DBHandle, GenId, Gen) -> rocksdb:put(DBHandle, schema_gen_key(GenId), term_to_binary(Gen), ?SCHEMA_WRITE_OPTS). @@ -238,11 +272,39 @@ schema_gen_key(N) -> -define(PERSISTENT_TERM(ZONE, GEN), {?MODULE, ZONE, GEN}). --spec meta_lookup(emqx_types:zone(), gen_id()) -> #generation{}. +-spec meta_register_gen(emqx_types:zone(), gen_id(), generation()) -> ok. +meta_register_gen(Zone, GenId, Gen) -> + Gs = + case GenId > 0 of + true -> meta_lookup(Zone, GenId - 1); + false -> [] + end, + ok = meta_put(Zone, GenId, [Gen | Gs]), + ok = meta_put(Zone, current, GenId). + +-spec meta_lookup_gen(emqx_types:zone(), emqx_replay:time()) -> generation(). +meta_lookup_gen(Zone, Time) -> + % TODO + % Is cheaper persistent term GC on update here worth extra lookup? I'm leaning + % towards a "no". + GenId = meta_lookup(Zone, current), + Gens = meta_lookup(Zone, GenId), + [Gen | _Older] = lists:dropwhile(fun(#{since := Since}) -> Since > Time end, Gens), + Gen. + +-spec meta_get_current(emqx_types:zone()) -> gen_id() | undefined. +meta_get_current(Zone) -> + meta_lookup(Zone, current, undefined). + +-spec meta_lookup(emqx_types:zone(), _K) -> _V. meta_lookup(Zone, GenId) -> persistent_term:get(?PERSISTENT_TERM(Zone, GenId)). --spec meta_put(emqx_types:zone(), gen_id(), #generation{}) -> ok. +-spec meta_lookup(emqx_types:zone(), _K, Default) -> _V | Default. +meta_lookup(Zone, GenId, Default) -> + persistent_term:get(?PERSISTENT_TERM(Zone, GenId), Default). + +-spec meta_put(emqx_types:zone(), _K, _V) -> ok. meta_put(Zone, GenId, Gen) -> persistent_term:put(?PERSISTENT_TERM(Zone, GenId), Gen). @@ -256,6 +318,15 @@ meta_erase(Zone) -> -undef(PERSISTENT_TERM). +get_next_id(undefined) -> 0; +get_next_id(GenId) -> GenId + 1. + +is_gen_valid(Zone, GenId, Since) when GenId > 0 -> + [#{since := SincePrev} | _] = meta_lookup(Zone, GenId - 1), + Since > SincePrev; +is_gen_valid(_Zone, 0, 0) -> + true. + %% -spec store_cfs(rocksdb:db_handle(), [{string(), rocksdb:cf_handle()}]) -> ok. %% store_cfs(DBHandle, CFRefs) -> %% lists:foreach( diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index eee802e69..d3518d780 100644 --- a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -176,7 +176,7 @@ end_per_testcase(TC, _Config) -> ok = emqx_replay_local_store_sup:stop_zone(zone(TC)). zone(TC) -> - list_to_atom(?MODULE_STRING ++ atom_to_list(TC)). + list_to_atom(lists:concat([?MODULE, "_", TC])). set_zone_config(Zone, Options) -> ok = application:set_env(emqx_replay, zone_config, #{ From bf9d57f6a9c2aa2684ab9e8a52d2d607fb1acae2 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 17 Jan 2023 15:58:29 +0300 Subject: [PATCH 34/49] chore(ds): Simplify iteration-related typespecs --- apps/emqx_replay/src/emqx_replay_local_store.erl | 2 +- apps/emqx_replay/src/emqx_replay_message_storage.erl | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 0138ffed5..35a31e65c 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -98,7 +98,7 @@ store(Zone, GUID, Time, Topic, Msg) -> #{module := Mod, data := Data} = meta_lookup_gen(Zone, Time), Mod:store(Data, GUID, Time, Topic, Msg). --spec make_iterator(emqx_types:zone(), emqx_topic:words(), emqx_replay_message_storage:time()) -> +-spec make_iterator(emqx_types:zone(), emqx_topic:words(), emqx_replay:time()) -> {ok, _TODO} | {error, _TODO}. make_iterator(Zone, TopicFilter, StartTime) -> %% TODO: this is not supposed to work like this. Just a mock-up diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index f2a6afaa6..fb96863d1 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -140,11 +140,8 @@ %% Type declarations %%================================================================================ -%% parsed --type topic() :: list(binary()). - -%% TODO granularity? --type time() :: integer(). +-type topic() :: emqx_replay:topic(). +-type time() :: emqx_replay:time(). %% Number of bits -type bits() :: non_neg_integer(). @@ -292,20 +289,19 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, Value = make_message_value(Topic, MessagePayload), rocksdb:put(DBHandle, CFHandle, Key, Value, DB#db.write_options). --spec make_iterator(db(), emqx_topic:words(), time() | earliest) -> +-spec make_iterator(db(), emqx_topic:words(), time()) -> {ok, iterator()} | {error, _TODO}. make_iterator(DB, TopicFilter, StartTime) -> Options = emqx_replay_conf:zone_iteration_options(DB#db.zone), make_iterator(DB, TopicFilter, StartTime, Options). --spec make_iterator(db(), emqx_topic:words(), time() | earliest, iteration_options()) -> +-spec make_iterator(db(), emqx_topic:words(), time(), iteration_options()) -> % {error, invalid_start_time}? might just start from the beginning of time % and call it a day: client violated the contract anyway. {ok, iterator()} | {error, _TODO}. make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime, Options) -> case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of {ok, ITHandle} -> - % TODO earliest Filter = make_keyspace_filter(TopicFilter, StartTime, DB#db.keymapper), InitialSeek = combine(compute_initial_seek(Filter), <<>>, DB#db.keymapper), RefreshCounter = make_refresh_counter(maps:get(iterator_refresh, Options, undefined)), From 83d1606d00c80906eb7376cc5a86e080f9467111 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 17 Jan 2023 17:20:17 +0300 Subject: [PATCH 35/49] feat(ds): Make iteration fully generation-aware --- .../src/emqx_replay_local_store.erl | 98 +++++++++++++------ .../test/emqx_replay_local_store_SUITE.erl | 77 ++++++++++++--- 2 files changed, 133 insertions(+), 42 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 35a31e65c..77f0b2924 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -61,6 +61,10 @@ }). -record(it, { + zone :: emqx_types:zone(), + gen :: gen_id(), + filter :: emqx_topic:words(), + start_time :: emqx_replay:time(), module :: module(), data :: term() }). @@ -95,31 +99,36 @@ create_generation(Zone, Since, Config = {_Module, _Options}) -> -spec store(emqx_types:zone(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary()) -> ok | {error, _TODO}. store(Zone, GUID, Time, Topic, Msg) -> - #{module := Mod, data := Data} = meta_lookup_gen(Zone, Time), + {_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Zone, Time), Mod:store(Data, GUID, Time, Topic, Msg). -spec make_iterator(emqx_types:zone(), emqx_topic:words(), emqx_replay:time()) -> - {ok, _TODO} | {error, _TODO}. + {ok, iterator()} | {error, _TODO}. make_iterator(Zone, TopicFilter, StartTime) -> - %% TODO: this is not supposed to work like this. Just a mock-up - #{module := Mod, data := Data} = meta_lookup_gen(Zone, StartTime), - case Mod:make_iterator(Data, TopicFilter, StartTime) of - {ok, It} -> - {ok, #it{ - module = Mod, - data = It - }}; - Err -> - Err - end. + {GenId, Gen} = meta_lookup_gen(Zone, StartTime), + open_iterator(Gen, #it{ + zone = Zone, + gen = GenId, + filter = TopicFilter, + start_time = StartTime + }). -spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}. -next(#it{module = Mod, data = It0}) -> - case Mod:next(It0) of - {value, Val, It} -> - {value, Val, #it{module = Mod, data = It}}; - Other -> - Other +next(It = #it{module = Mod, data = ItData}) -> + case Mod:next(ItData) of + {value, Val, ItDataNext} -> + {value, Val, It#it{data = ItDataNext}}; + {error, _} = Error -> + Error; + none -> + case open_next_iterator(It) of + {ok, ItNext} -> + next(ItNext); + {error, _} = Error -> + Error; + none -> + none + end end. %%================================================================================ @@ -232,6 +241,24 @@ open_gen( DB = Mod:open(Zone, DBHandle, GenId, CFs, Data), Gen#{data := DB}. +-spec open_next_iterator(iterator()) -> {ok, iterator()} | {error, _Reason} | none. +open_next_iterator(It = #it{zone = Zone, gen = GenId}) -> + open_next_iterator(meta_get_gen(Zone, GenId + 1), It#it{gen = GenId + 1}). + +open_next_iterator(undefined, _It) -> + none; +open_next_iterator(Gen = #{}, It) -> + open_iterator(Gen, It). + +-spec open_iterator(generation(), iterator()) -> {ok, iterator()} | {error, _Reason}. +open_iterator(#{module := Mod, data := Data}, It = #it{}) -> + case Mod:make_iterator(Data, It#it.filter, It#it.start_time) of + {ok, ItData} -> + {ok, It#it{module = Mod, data = ItData}}; + Err -> + Err + end. + %% Functions for dealing with the metadata stored persistently in rocksdb -define(CURRENT_GEN, <<"current">>). @@ -282,31 +309,42 @@ meta_register_gen(Zone, GenId, Gen) -> ok = meta_put(Zone, GenId, [Gen | Gs]), ok = meta_put(Zone, current, GenId). --spec meta_lookup_gen(emqx_types:zone(), emqx_replay:time()) -> generation(). +-spec meta_lookup_gen(emqx_types:zone(), emqx_replay:time()) -> {gen_id(), generation()}. meta_lookup_gen(Zone, Time) -> % TODO % Is cheaper persistent term GC on update here worth extra lookup? I'm leaning % towards a "no". - GenId = meta_lookup(Zone, current), - Gens = meta_lookup(Zone, GenId), - [Gen | _Older] = lists:dropwhile(fun(#{since := Since}) -> Since > Time end, Gens), - Gen. + Current = meta_lookup(Zone, current), + Gens = meta_lookup(Zone, Current), + find_gen(Time, Current, Gens). + +find_gen(Time, GenId, [Gen = #{since := Since} | _]) when Time >= Since -> + {GenId, Gen}; +find_gen(Time, GenId, [_Gen | Rest]) -> + find_gen(Time, GenId - 1, Rest). + +-spec meta_get_gen(emqx_types:zone(), gen_id()) -> generation() | undefined. +meta_get_gen(Zone, GenId) -> + case meta_lookup(Zone, GenId, []) of + [Gen | _Older] -> Gen; + [] -> undefined + end. -spec meta_get_current(emqx_types:zone()) -> gen_id() | undefined. meta_get_current(Zone) -> meta_lookup(Zone, current, undefined). -spec meta_lookup(emqx_types:zone(), _K) -> _V. -meta_lookup(Zone, GenId) -> - persistent_term:get(?PERSISTENT_TERM(Zone, GenId)). +meta_lookup(Zone, K) -> + persistent_term:get(?PERSISTENT_TERM(Zone, K)). -spec meta_lookup(emqx_types:zone(), _K, Default) -> _V | Default. -meta_lookup(Zone, GenId, Default) -> - persistent_term:get(?PERSISTENT_TERM(Zone, GenId), Default). +meta_lookup(Zone, K, Default) -> + persistent_term:get(?PERSISTENT_TERM(Zone, K), Default). -spec meta_put(emqx_types:zone(), _K, _V) -> ok. -meta_put(Zone, GenId, Gen) -> - persistent_term:put(?PERSISTENT_TERM(Zone, GenId), Gen). +meta_put(Zone, K, V) -> + persistent_term:put(?PERSISTENT_TERM(Zone, K), V). -spec meta_erase(emqx_types:zone()) -> ok. meta_erase(Zone) -> diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index d3518d780..edda0f7f6 100644 --- a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -23,6 +23,25 @@ -define(ZONE, zone(?FUNCTION_NAME)). +-define(DEFAULT_CONFIG, + {emqx_replay_message_storage, #{ + timestamp_bits => 64, + topic_bits_per_level => [8, 8, 32, 16], + epoch => 5, + iteration => #{ + iterator_refresh => {every, 5} + } + }} +). + +-define(COMPACT_CONFIG, + {emqx_replay_message_storage, #{ + timestamp_bits => 16, + topic_bits_per_level => [16, 16], + epoch => 10 + }} +). + %% Smoke test for opening and reopening the database t_open(_Config) -> ok = emqx_replay_local_store_sup:stop_zone(?ZONE), @@ -128,6 +147,49 @@ t_iterate_long_tail_wildcard(_Config) -> lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, TopicFilter, 50)]) ). +t_create_gen(_Config) -> + {ok, 1} = emqx_replay_local_store:create_generation(?ZONE, 5, ?DEFAULT_CONFIG), + ?assertEqual( + {error, nonmonotonic}, + emqx_replay_local_store:create_generation(?ZONE, 1, ?DEFAULT_CONFIG) + ), + ?assertEqual( + {error, nonmonotonic}, + emqx_replay_local_store:create_generation(?ZONE, 5, ?DEFAULT_CONFIG) + ), + {ok, 2} = emqx_replay_local_store:create_generation(?ZONE, 10, ?COMPACT_CONFIG), + Topics = ["foo/bar", "foo/bar/baz"], + Timestamps = lists:seq(1, 100), + [ + ?assertEqual(ok, store(?ZONE, PublishedAt, Topic, <<>>)) + || Topic <- Topics, PublishedAt <- Timestamps + ]. + +t_iterate_multigen(_Config) -> + {ok, 1} = emqx_replay_local_store:create_generation(?ZONE, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_replay_local_store:create_generation(?ZONE, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_replay_local_store:create_generation(?ZONE, 1000, ?DEFAULT_CONFIG), + Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], + Timestamps = lists:seq(1, 100), + _ = [ + store(?ZONE, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) + || Topic <- Topics, PublishedAt <- Timestamps + ], + ?assertEqual( + lists:sort([ + {Topic, PublishedAt} + || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps + ]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/#", 0)]) + ), + ?assertEqual( + lists:sort([ + {Topic, PublishedAt} + || Topic <- ["a", "a/bar"], PublishedAt <- lists:seq(60, 100) + ]), + lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "a/#", 60)]) + ). + store(Zone, PublishedAt, Topic, Payload) -> ID = emqx_guid:gen(), emqx_replay_local_store:store(Zone, ID, PublishedAt, parse_topic(Topic), Payload). @@ -161,14 +223,7 @@ end_per_suite(_Config) -> ok = application:stop(emqx_replay). init_per_testcase(TC, Config) -> - ok = set_zone_config(zone(TC), #{ - timestamp_bits => 64, - topic_bits_per_level => [8, 8, 32, 16], - epoch => 5, - iteration => #{ - iterator_refresh => {every, 5} - } - }), + ok = set_zone_config(zone(TC), ?DEFAULT_CONFIG), {ok, _} = emqx_replay_local_store_sup:start_zone(zone(TC)), Config. @@ -178,7 +233,5 @@ end_per_testcase(TC, _Config) -> zone(TC) -> list_to_atom(lists:concat([?MODULE, "_", TC])). -set_zone_config(Zone, Options) -> - ok = application:set_env(emqx_replay, zone_config, #{ - Zone => {emqx_replay_message_storage, Options} - }). +set_zone_config(Zone, Config) -> + ok = application:set_env(emqx_replay, zone_config, #{Zone => Config}). From cbded9af39a30d18e5851642ccec13d2203b57c2 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 17 Jan 2023 17:21:59 +0300 Subject: [PATCH 36/49] feat(ds): Make `create_generation` safer against bad input --- .../src/emqx_replay_local_store.erl | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 77f0b2924..7148308b2 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -92,7 +92,7 @@ start_link(Zone) -> gen_server:start_link(?REF(Zone), ?MODULE, [Zone], []). -spec create_generation(emqx_types:zone(), emqx_replay:time(), emqx_replay_conf:backend_config()) -> - {ok, gen_id()}. + {ok, gen_id()} | {error, nonmonotonic}. create_generation(Zone, Since, Config = {_Module, _Options}) -> gen_server:call(?REF(Zone), {create_generation, Since, Config}). @@ -148,8 +148,12 @@ init([Zone]) -> {ok, S}. handle_call({create_generation, Since, Config}, _From, S) -> - {ok, GenId, NS} = create_new_gen(Since, Config, S), - {reply, {ok, GenId}, NS}; + case create_new_gen(Since, Config, S) of + {ok, GenId, NS} -> + {reply, {ok, GenId}, NS}; + {error, _} = Error -> + {reply, Error, S} + end; handle_call(_Call, _From, S) -> {reply, {error, unknown_call}, S}. @@ -193,17 +197,22 @@ ensure_current_generation(S = #s{zone = Zone, db = DBHandle}) -> create_new_gen(Since, Config, S = #s{zone = Zone, db = DBHandle}) -> GenId = get_next_id(meta_get_current(Zone)), GenId = get_next_id(schema_get_current(DBHandle)), - % TODO: Propagate errors to clients. - true = is_gen_valid(Zone, GenId, Since), - {ok, Gen, NS} = create_gen(GenId, Since, Config, S), - %% TODO: Transaction? Column family creation can't be transactional, anyway. - ok = schema_put_gen(DBHandle, GenId, Gen), - ok = schema_put_current(DBHandle, GenId), - ok = meta_register_gen(Zone, GenId, Gen), - {ok, GenId, NS}. + case is_gen_valid(Zone, GenId, Since) of + ok -> + {ok, Gen, NS} = create_gen(GenId, Since, Config, S), + %% TODO: Transaction? Column family creation can't be transactional, anyway. + ok = schema_put_gen(DBHandle, GenId, Gen), + ok = schema_put_current(DBHandle, GenId), + ok = meta_register_gen(Zone, GenId, open_gen(GenId, Gen, NS)), + {ok, GenId, NS}; + {error, _} = Error -> + Error + end. -% -spec +-spec create_gen(gen_id(), emqx_replay:time(), emqx_replay_conf:backend_config(), state()) -> + {ok, generation(), state()}. create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, column_families = CFs}) -> + % TODO: Backend implementation should ensure idempotency. {Schema, NewCFs} = Module:create_new(DBHandle, GenId, Options), Gen = #{ module => Module, @@ -360,10 +369,15 @@ get_next_id(undefined) -> 0; get_next_id(GenId) -> GenId + 1. is_gen_valid(Zone, GenId, Since) when GenId > 0 -> - [#{since := SincePrev} | _] = meta_lookup(Zone, GenId - 1), - Since > SincePrev; + [GenPrev | _] = meta_lookup(Zone, GenId - 1), + case GenPrev of + #{since := SincePrev} when Since > SincePrev -> + ok; + #{} -> + {error, nonmonotonic} + end; is_gen_valid(_Zone, 0, 0) -> - true. + ok. %% -spec store_cfs(rocksdb:db_handle(), [{string(), rocksdb:cf_handle()}]) -> ok. %% store_cfs(DBHandle, CFRefs) -> From d2065e0c1b1c796909d210965c125dca9dff3b7d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 9 Feb 2023 19:22:18 +0300 Subject: [PATCH 37/49] feat(ds): allow to preserve / restore iterators in the db So that we could guarantee replay consistency / availability under the assumption that nodes may be restarted or even lost occasionally. --- apps/emqx_replay/src/emqx_replay.erl | 8 + .../src/emqx_replay_local_store.erl | 177 ++++++++++++++---- .../src/emqx_replay_message_storage.erl | 43 ++--- .../test/emqx_replay_local_store_SUITE.erl | 4 +- .../emqx_replay_message_storage_shim.erl | 6 +- .../props/prop_replay_message_storage.erl | 61 +++--- 6 files changed, 208 insertions(+), 91 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay.erl b/apps/emqx_replay/src/emqx_replay.erl index fb1ec39c6..ed790697f 100644 --- a/apps/emqx_replay/src/emqx_replay.erl +++ b/apps/emqx_replay/src/emqx_replay.erl @@ -19,6 +19,7 @@ -export([]). -export_type([topic/0, time/0]). +-export_type([replay_id/0, replay/0]). %%================================================================================ %% Type declarations @@ -32,6 +33,13 @@ %% TODO granularity? -type time() :: non_neg_integer(). +-type replay_id() :: binary(). + +-type replay() :: { + _TopicFilter :: topic(), + _StartTime :: time() +}. + %%================================================================================ %% API funcions %%================================================================================ diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 7148308b2..8a74f248e 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -21,7 +21,11 @@ -export([start_link/1]). -export([create_generation/3]). --export([store/5, make_iterator/3, next/1]). +-export([store/5]). + +-export([make_iterator/2, next/1]). + +-export([preserve_iterator/2, restore_iterator/2, discard_iterator/2]). %% behavior callbacks: -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -57,14 +61,14 @@ -record(s, { zone :: emqx_types:zone(), db :: rocksdb:db_handle(), - column_families :: cf_refs() + cf_iterator :: rocksdb:cf_handle(), + cf_generations :: cf_refs() }). -record(it, { zone :: emqx_types:zone(), gen :: gen_id(), - filter :: emqx_topic:words(), - start_time :: emqx_replay:time(), + replay :: emqx_replay:replay(), module :: module(), data :: term() }). @@ -79,8 +83,17 @@ %% [{<<"genNN">>, #generation{}}, ..., %% {<<"current">>, GenID}] +-define(DEFAULT_CF, "default"). -define(DEFAULT_CF_OPTS, []). +-define(ITERATOR_CF, "$iterators"). + +%% TODO +%% 1. CuckooTable might be of use here / `OptimizeForPointLookup(...)`. +%% 2. Supposedly might be compressed _very_ effectively. +%% 3. `inplace_update_support`? +-define(ITERATOR_CF_OPTS, []). + -define(REF(Zone), {via, gproc, {n, l, {?MODULE, Zone}}}). %%================================================================================ @@ -102,15 +115,14 @@ store(Zone, GUID, Time, Topic, Msg) -> {_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Zone, Time), Mod:store(Data, GUID, Time, Topic, Msg). --spec make_iterator(emqx_types:zone(), emqx_topic:words(), emqx_replay:time()) -> +-spec make_iterator(emqx_types:zone(), emqx_replay:replay()) -> {ok, iterator()} | {error, _TODO}. -make_iterator(Zone, TopicFilter, StartTime) -> +make_iterator(Zone, Replay = {_, StartTime}) -> {GenId, Gen} = meta_lookup_gen(Zone, StartTime), open_iterator(Gen, #it{ zone = Zone, gen = GenId, - filter = TopicFilter, - start_time = StartTime + replay = Replay }). -spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}. @@ -131,20 +143,37 @@ next(It = #it{module = Mod, data = ItData}) -> end end. +-spec preserve_iterator(iterator(), emqx_replay:replay_id()) -> + ok | {error, _TODO}. +preserve_iterator(It = #it{}, ReplayID) -> + iterator_put_state(ReplayID, It). + +-spec restore_iterator(emqx_types:zone(), emqx_replay:replay_id()) -> + {ok, iterator()} | {error, _TODO}. +restore_iterator(Zone, ReplayID) -> + case iterator_get_state(Zone, ReplayID) of + {ok, Serial} -> + restore_iterator_state(Zone, Serial); + not_found -> + {error, not_found}; + {error, _Reason} = Error -> + Error + end. + +-spec discard_iterator(emqx_types:zone(), emqx_replay:replay_id()) -> + ok | {error, _TODO}. +discard_iterator(Zone, ReplayID) -> + iterator_delete(Zone, ReplayID). + %%================================================================================ %% behavior callbacks %%================================================================================ init([Zone]) -> process_flag(trap_exit, true), - {ok, DBHandle, CFRefs} = open_db(Zone), - S0 = #s{ - zone = Zone, - db = DBHandle, - column_families = CFRefs - }, + {ok, S0} = open_db(Zone), S = ensure_current_generation(S0), - read_metadata(S), + ok = populate_metadata(S), {ok, S}. handle_call({create_generation, Since, Config}, _From, S) -> @@ -171,13 +200,16 @@ terminate(_Reason, #s{db = DB, zone = Zone}) -> %% Internal functions %%================================================================================ --spec read_metadata(state()) -> ok. -read_metadata(S = #s{db = DBHandle}) -> - Current = schema_get_current(DBHandle), - lists:foreach(fun(GenId) -> read_metadata(GenId, S) end, lists:seq(0, Current)). +-record(db, {handle :: rocksdb:db_handle(), cf_iterator :: rocksdb:cf_handle()}). --spec read_metadata(gen_id(), state()) -> ok. -read_metadata(GenId, S = #s{zone = Zone, db = DBHandle}) -> +-spec populate_metadata(state()) -> ok. +populate_metadata(S = #s{zone = Zone, db = DBHandle, cf_iterator = CFIterator}) -> + ok = meta_put(Zone, db, #db{handle = DBHandle, cf_iterator = CFIterator}), + Current = schema_get_current(DBHandle), + lists:foreach(fun(GenId) -> populate_metadata(GenId, S) end, lists:seq(0, Current)). + +-spec populate_metadata(gen_id(), state()) -> ok. +populate_metadata(GenId, S = #s{zone = Zone, db = DBHandle}) -> Gen = open_gen(GenId, schema_get_gen(DBHandle, GenId), S), meta_register_gen(Zone, GenId, Gen). @@ -193,7 +225,7 @@ ensure_current_generation(S = #s{zone = Zone, db = DBHandle}) -> end. -spec create_new_gen(emqx_replay:time(), emqx_replay_conf:backend_config(), state()) -> - {ok, gen_id(), state()}. + {ok, gen_id(), state()} | {error, nonmonotonic}. create_new_gen(Since, Config, S = #s{zone = Zone, db = DBHandle}) -> GenId = get_next_id(meta_get_current(Zone)), GenId = get_next_id(schema_get_current(DBHandle)), @@ -211,7 +243,7 @@ create_new_gen(Since, Config, S = #s{zone = Zone, db = DBHandle}) -> -spec create_gen(gen_id(), emqx_replay:time(), emqx_replay_conf:backend_config(), state()) -> {ok, generation(), state()}. -create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, column_families = CFs}) -> +create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, cf_generations = CFs}) -> % TODO: Backend implementation should ensure idempotency. {Schema, NewCFs} = Module:create_new(DBHandle, GenId, Options), Gen = #{ @@ -219,24 +251,38 @@ create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, column_familie data => Schema, since => Since }, - {ok, Gen, S#s{column_families = NewCFs ++ CFs}}. + {ok, Gen, S#s{cf_generations = NewCFs ++ CFs}}. --spec open_db(emqx_types:zone()) -> {ok, rocksdb:db_handle(), cf_refs()} | {error, _TODO}. +-spec open_db(emqx_types:zone()) -> {ok, state()} | {error, _TODO}. open_db(Zone) -> Filename = atom_to_list(Zone), - DBOptions = emqx_replay_conf:db_options(), - ColumnFamiles = + DBOptions = [ + {create_if_missing, true}, + {create_missing_column_families, true} + | emqx_replay_conf:db_options() + ], + ExistingCFs = case rocksdb:list_column_families(Filename, DBOptions) of - {ok, ColumnFamiles0} -> - [{I, []} || I <- ColumnFamiles0]; + {ok, CFs} -> + [{Name, []} || Name <- CFs, Name /= ?DEFAULT_CF, Name /= ?ITERATOR_CF]; % DB is not present. First start {error, {db_open, _}} -> - [{"default", ?DEFAULT_CF_OPTS}] + [] end, - case rocksdb:open(Filename, [{create_if_missing, true} | DBOptions], ColumnFamiles) of - {ok, Handle, CFRefs} -> - {CFNames, _} = lists:unzip(ColumnFamiles), - {ok, Handle, lists:zip(CFNames, CFRefs)}; + ColumnFamilies = [ + {?DEFAULT_CF, ?DEFAULT_CF_OPTS}, + {?ITERATOR_CF, ?ITERATOR_CF_OPTS} + | ExistingCFs + ], + case rocksdb:open(Filename, DBOptions, ColumnFamilies) of + {ok, DBHandle, [_CFDefault, CFIterator | CFRefs]} -> + {CFNames, _} = lists:unzip(ExistingCFs), + {ok, #s{ + zone = Zone, + db = DBHandle, + cf_iterator = CFIterator, + cf_generations = lists:zip(CFNames, CFRefs) + }}; Error -> Error end. @@ -245,7 +291,7 @@ open_db(Zone) -> open_gen( GenId, Gen = #{module := Mod, data := Data}, - #s{zone = Zone, db = DBHandle, column_families = CFs} + #s{zone = Zone, db = DBHandle, cf_generations = CFs} ) -> DB = Mod:open(Zone, DBHandle, GenId, CFs, Data), Gen#{data := DB}. @@ -261,13 +307,72 @@ open_next_iterator(Gen = #{}, It) -> -spec open_iterator(generation(), iterator()) -> {ok, iterator()} | {error, _Reason}. open_iterator(#{module := Mod, data := Data}, It = #it{}) -> - case Mod:make_iterator(Data, It#it.filter, It#it.start_time) of + case Mod:make_iterator(Data, It#it.replay) of {ok, ItData} -> {ok, It#it{module = Mod, data = ItData}}; Err -> Err end. +-spec open_restore_iterator(generation(), iterator(), binary()) -> + {ok, iterator()} | {error, _Reason}. +open_restore_iterator(#{module := Mod, data := Data}, It = #it{replay = Replay}, Serial) -> + case Mod:restore_iterator(Data, Replay, Serial) of + {ok, ItData} -> + {ok, It#it{module = Mod, data = ItData}}; + Err -> + Err + end. + +%% + +-define(KEY_REPLAY_STATE(ReplayID), <<(ReplayID)/binary, "rs">>). + +-define(ITERATION_WRITE_OPTS, []). +-define(ITERATION_READ_OPTS, []). + +iterator_get_state(Zone, ReplayID) -> + #db{handle = Handle, cf_iterator = CF} = meta_lookup(Zone, db), + rocksdb:get(Handle, CF, ?KEY_REPLAY_STATE(ReplayID), ?ITERATION_READ_OPTS). + +iterator_put_state(ID, It = #it{zone = Zone}) -> + #db{handle = Handle, cf_iterator = CF} = meta_lookup(Zone, db), + Serial = preserve_iterator_state(It), + rocksdb:put(Handle, CF, ?KEY_REPLAY_STATE(ID), Serial, ?ITERATION_WRITE_OPTS). + +iterator_delete(Zone, ID) -> + #db{handle = Handle, cf_iterator = CF} = meta_lookup(Zone, db), + rocksdb:delete(Handle, CF, ?KEY_REPLAY_STATE(ID), ?ITERATION_WRITE_OPTS). + +preserve_iterator_state(#it{ + gen = Gen, + replay = {TopicFilter, StartTime}, + module = Mod, + data = ItData +}) -> + term_to_binary(#{ + v => 1, + gen => Gen, + filter => TopicFilter, + start => StartTime, + st => Mod:preserve_iterator(ItData) + }). + +restore_iterator_state(Zone, Serial) when is_binary(Serial) -> + restore_iterator_state(Zone, binary_to_term(Serial)); +restore_iterator_state( + Zone, + #{ + v := 1, + gen := Gen, + filter := TopicFilter, + start := StartTime, + st := State + } +) -> + It = #it{zone = Zone, gen = Gen, replay = {TopicFilter, StartTime}}, + open_restore_iterator(meta_get_gen(Zone, Gen), It, State). + %% Functions for dealing with the metadata stored persistently in rocksdb -define(CURRENT_GEN, <<"current">>). diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index fb96863d1..fbeb452c9 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -94,12 +94,12 @@ -export([make_keymapper/1]). -export([store/5]). +-export([make_iterator/2]). -export([make_iterator/3]). --export([make_iterator/4]). -export([next/1]). -export([preserve_iterator/1]). --export([restore_iterator/2]). +-export([restore_iterator/3]). -export([refresh_iterator/1]). %% Debug/troubleshooting: @@ -114,7 +114,7 @@ %% Keyspace filters -export([ - make_keyspace_filter/3, + make_keyspace_filter/2, compute_initial_seek/1, compute_next_seek/2, compute_time_seek/3, @@ -289,20 +289,20 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, Value = make_message_value(Topic, MessagePayload), rocksdb:put(DBHandle, CFHandle, Key, Value, DB#db.write_options). --spec make_iterator(db(), emqx_topic:words(), time()) -> +-spec make_iterator(db(), emqx_replay:replay()) -> {ok, iterator()} | {error, _TODO}. -make_iterator(DB, TopicFilter, StartTime) -> +make_iterator(DB, Replay) -> Options = emqx_replay_conf:zone_iteration_options(DB#db.zone), - make_iterator(DB, TopicFilter, StartTime, Options). + make_iterator(DB, Replay, Options). --spec make_iterator(db(), emqx_topic:words(), time(), iteration_options()) -> +-spec make_iterator(db(), emqx_replay:replay(), iteration_options()) -> % {error, invalid_start_time}? might just start from the beginning of time % and call it a day: client violated the contract anyway. {ok, iterator()} | {error, _TODO}. -make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, TopicFilter, StartTime, Options) -> +make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, Replay, Options) -> case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of {ok, ITHandle} -> - Filter = make_keyspace_filter(TopicFilter, StartTime, DB#db.keymapper), + Filter = make_keyspace_filter(Replay, DB#db.keymapper), InitialSeek = combine(compute_initial_seek(Filter), <<>>, DB#db.keymapper), RefreshCounter = make_refresh_counter(maps:get(iterator_refresh, Options, undefined)), {ok, #it{ @@ -342,26 +342,23 @@ next(It0 = #it{filter = #filter{keymapper = Keymapper}}) -> end. -spec preserve_iterator(iterator()) -> binary(). -preserve_iterator(#it{cursor = Cursor, filter = Filter}) -> +preserve_iterator(#it{cursor = Cursor}) -> State = #{ v => 1, - cursor => Cursor, - filter => Filter#filter.topic_filter, - stime => Filter#filter.start_time + cursor => Cursor }, term_to_binary(State). --spec restore_iterator(db(), binary()) -> {ok, iterator()} | {error, _TODO}. -restore_iterator(DB, Serial) when is_binary(Serial) -> +-spec restore_iterator(db(), emqx_replay:replay(), binary()) -> + {ok, iterator()} | {error, _TODO}. +restore_iterator(DB, Replay, Serial) when is_binary(Serial) -> State = binary_to_term(Serial), - restore_iterator(DB, State); -restore_iterator(DB, #{ + restore_iterator(DB, Replay, State); +restore_iterator(DB, Replay, #{ v := 1, - cursor := Cursor, - filter := TopicFilter, - stime := StartTime + cursor := Cursor }) -> - case make_iterator(DB, TopicFilter, StartTime) of + case make_iterator(DB, Replay) of {ok, It} when Cursor == undefined -> % Iterator was preserved right after it has been made. {ok, It}; @@ -434,8 +431,8 @@ hash(Input, Bits) -> % at most 32 bits erlang:phash2(Input, 1 bsl Bits). --spec make_keyspace_filter(emqx_topic:words(), time(), keymapper()) -> keyspace_filter(). -make_keyspace_filter(TopicFilter, StartTime, Keymapper) -> +-spec make_keyspace_filter(emqx_replay:replay(), keymapper()) -> keyspace_filter(). +make_keyspace_filter({TopicFilter, StartTime}, Keymapper) -> Bitstring = compute_bitstring(TopicFilter, StartTime, Keymapper), HashBitmask = compute_topic_bitmask(TopicFilter, Keymapper), TimeBitmask = compute_time_bitmask(Keymapper), diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index edda0f7f6..f36dda267 100644 --- a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -73,7 +73,7 @@ t_iterate(_Config) -> %% Iterate through individual topics: [ begin - {ok, It} = emqx_replay_local_store:make_iterator(?ZONE, Topic, 0), + {ok, It} = emqx_replay_local_store:make_iterator(?ZONE, {Topic, 0}), Values = iterate(It), ?assertEqual(lists:map(fun integer_to_binary/1, Timestamps), Values) end @@ -195,7 +195,7 @@ store(Zone, PublishedAt, Topic, Payload) -> emqx_replay_local_store:store(Zone, ID, PublishedAt, parse_topic(Topic), Payload). iterate(DB, TopicFilter, StartTime) -> - {ok, It} = emqx_replay_local_store:make_iterator(DB, parse_topic(TopicFilter), StartTime), + {ok, It} = emqx_replay_local_store:make_iterator(DB, {parse_topic(TopicFilter), StartTime}), iterate(It). iterate(It) -> diff --git a/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl b/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl index 125c9a9fc..160451cb9 100644 --- a/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl +++ b/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl @@ -19,7 +19,7 @@ -export([open/0]). -export([close/1]). -export([store/5]). --export([iterate/3]). +-export([iterate/2]). -type topic() :: list(binary()). -type time() :: integer(). @@ -41,9 +41,9 @@ store(Tab, MessageID, PublishedAt, Topic, Payload) -> true = ets:insert(Tab, {{PublishedAt, MessageID}, Topic, Payload}), ok. --spec iterate(t(), emqx_topic:words(), time()) -> +-spec iterate(t(), emqx_replay:replay()) -> [binary()]. -iterate(Tab, TopicFilter, StartTime) -> +iterate(Tab, {TopicFilter, StartTime}) -> ets:foldr( fun({{PublishedAt, _}, Topic, Payload}, Acc) -> case emqx_topic:match(Topic, TopicFilter) of diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl index c468097c7..ede6dc336 100644 --- a/apps/emqx_replay/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -21,6 +21,8 @@ -define(WORK_DIR, ["_build", "test"]). -define(RUN_ID, {?MODULE, testrun_id}). + +-define(ZONE, ?MODULE). -define(GEN_ID, 42). %%-------------------------------------------------------------------- @@ -51,8 +53,7 @@ prop_next_seek_monotonic() -> {topic_filter(), pos_integer(), keymapper()}, begin Filter = emqx_replay_message_storage:make_keyspace_filter( - TopicFilter, - StartTime, + {TopicFilter, StartTime}, Keymapper ), ?FORALL( @@ -99,8 +100,9 @@ prop_iterate_messages() -> }, begin TopicFilter = make_topic_filter(Pattern, Topic), - Messages = iterate_db(DB, TopicFilter, StartTime), - Reference = iterate_shim(Shim, TopicFilter, StartTime), + Iteration = {TopicFilter, StartTime}, + Messages = iterate_db(DB, Iteration), + Reference = iterate_shim(Shim, Iteration), ok = close_db(Handle), ok = emqx_replay_message_storage_shim:close(Shim), ?WHENFAIL( @@ -143,10 +145,11 @@ prop_iterate_eq_iterate_with_preserve_restore() -> shuffled(flat([non_empty(list({preserve, restore})), list(iterate)])) }, begin - TopicFilter = make_topic_filter(Pat, Topic), - Iterator = make_iterator(DB, TopicFilter, StartTime), - Messages = run_iterator_commands(Commands, Iterator, DB), - equals(Messages, iterate_db(DB, TopicFilter, StartTime)) + Replay = {make_topic_filter(Pat, Topic), StartTime}, + Iterator = make_iterator(DB, Replay), + Ctx = #{db => DB, replay => Replay}, + Messages = run_iterator_commands(Commands, Iterator, Ctx), + equals(Messages, iterate_db(DB, Replay)) end ) end). @@ -177,11 +180,11 @@ prop_iterate_eq_iterate_with_refresh() -> pos_integer() }, ?TIMEOUT(5000, begin - TopicFilter = make_topic_filter(Pat, Topic), + Replay = {make_topic_filter(Pat, Topic), StartTime}, IterationOptions = #{iterator_refresh => {every, RefreshEvery}}, - Iterator = make_iterator(DB, TopicFilter, StartTime, IterationOptions), + Iterator = make_iterator(DB, Replay, IterationOptions), Messages = iterate_db(Iterator), - equals(Messages, iterate_db(DB, TopicFilter, StartTime)) + equals(Messages, iterate_db(DB, Replay)) end) ) end). @@ -205,8 +208,8 @@ store_db(DB, Messages) -> Messages ). -iterate_db(DB, TopicFilter, StartTime) -> - iterate_db(make_iterator(DB, TopicFilter, StartTime)). +iterate_db(DB, Iteration) -> + iterate_db(make_iterator(DB, Iteration)). iterate_db(It) -> case emqx_replay_message_storage:next(It) of @@ -216,26 +219,30 @@ iterate_db(It) -> [] end. -make_iterator(DB, TopicFilter, StartTime) -> - {ok, It} = emqx_replay_message_storage:make_iterator(DB, TopicFilter, StartTime), +make_iterator(DB, Replay) -> + {ok, It} = emqx_replay_message_storage:make_iterator(DB, Replay), It. -make_iterator(DB, TopicFilter, StartTime, Options) -> - {ok, It} = emqx_replay_message_storage:make_iterator(DB, TopicFilter, StartTime, Options), +make_iterator(DB, Replay, Options) -> + {ok, It} = emqx_replay_message_storage:make_iterator(DB, Replay, Options), It. -run_iterator_commands([iterate | Rest], It, DB) -> +run_iterator_commands([iterate | Rest], It, Ctx) -> case emqx_replay_message_storage:next(It) of {value, Payload, ItNext} -> - [binary_to_term(Payload) | run_iterator_commands(Rest, ItNext, DB)]; + [binary_to_term(Payload) | run_iterator_commands(Rest, ItNext, Ctx)]; none -> [] end; -run_iterator_commands([{preserve, restore} | Rest], It, DB) -> +run_iterator_commands([{preserve, restore} | Rest], It, Ctx) -> + #{ + db := DB, + replay := Replay + } = Ctx, Serial = emqx_replay_message_storage:preserve_iterator(It), - {ok, ItNext} = emqx_replay_message_storage:restore_iterator(DB, Serial), - run_iterator_commands(Rest, ItNext, DB); -run_iterator_commands([], It, _DB) -> + {ok, ItNext} = emqx_replay_message_storage:restore_iterator(DB, Replay, Serial), + run_iterator_commands(Rest, ItNext, Ctx); +run_iterator_commands([], It, _Ctx) -> iterate_db(It). store_shim(Shim, Messages) -> @@ -247,10 +254,10 @@ store_shim(Shim, Messages) -> Messages ). -iterate_shim(Shim, TopicFilter, StartTime) -> +iterate_shim(Shim, Iteration) -> lists:map( fun binary_to_term/1, - emqx_replay_message_storage_shim:iterate(Shim, TopicFilter, StartTime) + emqx_replay_message_storage_shim:iterate(Shim, Iteration) ). %%-------------------------------------------------------------------- @@ -260,7 +267,7 @@ iterate_shim(Shim, TopicFilter, StartTime) -> open_db(Filepath, Options) -> {ok, Handle} = rocksdb:open(Filepath, [{create_if_missing, true}]), {Schema, CFRefs} = emqx_replay_message_storage:create_new(Handle, ?GEN_ID, Options), - DB = emqx_replay_message_storage:open(Handle, ?GEN_ID, CFRefs, Schema), + DB = emqx_replay_message_storage:open(?ZONE, Handle, ?GEN_ID, CFRefs, Schema), {DB, Handle}. close_db(Handle) -> @@ -384,7 +391,7 @@ keyspace_filter() -> ?LET( {TopicFilter, StartTime, Keymapper}, {topic_filter(), pos_integer(), keymapper()}, - emqx_replay_message_storage:make_keyspace_filter(TopicFilter, StartTime, Keymapper) + emqx_replay_message_storage:make_keyspace_filter({TopicFilter, StartTime}, Keymapper) ). messages(Topic) -> From 8ac0bba958931947cc4281e2a47533482fd50a5d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 9 Feb 2023 21:35:11 +0300 Subject: [PATCH 38/49] test(ds): verify preserve / restore works with stored iterators --- .../test/emqx_replay_local_store_SUITE.erl | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index f36dda267..afed30b88 100644 --- a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -190,13 +190,49 @@ t_iterate_multigen(_Config) -> lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "a/#", 60)]) ). +t_iterate_multigen_preserve_restore(_Config) -> + ReplayID = atom_to_binary(?FUNCTION_NAME), + {ok, 1} = emqx_replay_local_store:create_generation(?ZONE, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_replay_local_store:create_generation(?ZONE, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_replay_local_store:create_generation(?ZONE, 100, ?DEFAULT_CONFIG), + Topics = ["foo/bar", "foo/bar/baz", "a/bar"], + Timestamps = lists:seq(1, 100), + TopicFilter = "foo/#", + TopicsMatching = ["foo/bar", "foo/bar/baz"], + _ = [ + store(?ZONE, TS, Topic, term_to_binary({Topic, TS})) + || Topic <- Topics, TS <- Timestamps + ], + It0 = iterator(?ZONE, TopicFilter, 0), + {It1, Res10} = iterate(It0, 10), + % preserve mid-generation + ok = emqx_replay_local_store:preserve_iterator(It1, ReplayID), + {ok, It2} = emqx_replay_local_store:restore_iterator(?ZONE, ReplayID), + {It3, Res100} = iterate(It2, 88), + % preserve on the generation boundary + ok = emqx_replay_local_store:preserve_iterator(It3, ReplayID), + {ok, It4} = emqx_replay_local_store:restore_iterator(?ZONE, ReplayID), + {It5, Res200} = iterate(It4, 1000), + ?assertEqual(none, It5), + ?assertEqual( + lists:sort([{Topic, TS} || Topic <- TopicsMatching, TS <- Timestamps]), + lists:sort([binary_to_term(Payload) || Payload <- Res10 ++ Res100 ++ Res200]) + ), + ?assertEqual( + ok, + emqx_replay_local_store:discard_iterator(?ZONE, ReplayID) + ), + ?assertEqual( + {error, not_found}, + emqx_replay_local_store:restore_iterator(?ZONE, ReplayID) + ). + store(Zone, PublishedAt, Topic, Payload) -> ID = emqx_guid:gen(), emqx_replay_local_store:store(Zone, ID, PublishedAt, parse_topic(Topic), Payload). iterate(DB, TopicFilter, StartTime) -> - {ok, It} = emqx_replay_local_store:make_iterator(DB, {parse_topic(TopicFilter), StartTime}), - iterate(It). + iterate(iterator(DB, TopicFilter, StartTime)). iterate(It) -> case emqx_replay_local_store:next(It) of @@ -206,6 +242,21 @@ iterate(It) -> [] end. +iterate(It, 0) -> + {It, []}; +iterate(It, N) -> + case emqx_replay_local_store:next(It) of + {value, Payload, ItNext} -> + {ItFinal, Ps} = iterate(ItNext, N - 1), + {ItFinal, [Payload | Ps]}; + none -> + {none, []} + end. + +iterator(DB, TopicFilter, StartTime) -> + {ok, It} = emqx_replay_local_store:make_iterator(DB, {parse_topic(TopicFilter), StartTime}), + It. + parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) -> Topic; parse_topic(Topic) -> From 04adb65c0944ae9486ded196f4a512fd5a5e9328 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Sun, 14 May 2023 23:22:39 +0200 Subject: [PATCH 39/49] refactor(ds): zone -> shard Also bump erlang-rocksdb version --- apps/emqx_replay/src/emqx_replay.erl | 4 +- apps/emqx_replay/src/emqx_replay_conf.erl | 29 ++- .../src/emqx_replay_local_store.erl | 182 +++++++++--------- .../src/emqx_replay_local_store_sup.erl | 24 +-- .../src/emqx_replay_message_storage.erl | 10 +- apps/emqx_replay/src/emqx_replay_sup.erl | 8 +- .../test/emqx_replay_local_store_SUITE.erl | 94 ++++----- 7 files changed, 177 insertions(+), 174 deletions(-) diff --git a/apps/emqx_replay/src/emqx_replay.erl b/apps/emqx_replay/src/emqx_replay.erl index ed790697f..91ce94134 100644 --- a/apps/emqx_replay/src/emqx_replay.erl +++ b/apps/emqx_replay/src/emqx_replay.erl @@ -18,7 +18,7 @@ %% API: -export([]). --export_type([topic/0, time/0]). +-export_type([topic/0, time/0, shard/0]). -export_type([replay_id/0, replay/0]). %%================================================================================ @@ -28,6 +28,8 @@ %% parsed -type topic() :: list(binary()). +-type shard() :: binary(). + %% Timestamp %% Earliest possible timestamp is 0. %% TODO granularity? diff --git a/apps/emqx_replay/src/emqx_replay_conf.erl b/apps/emqx_replay/src/emqx_replay_conf.erl index 46fa53867..45f1b4fa5 100644 --- a/apps/emqx_replay/src/emqx_replay_conf.erl +++ b/apps/emqx_replay/src/emqx_replay_conf.erl @@ -18,9 +18,9 @@ %% TODO: make a proper HOCON schema and all... %% API: --export([zone_config/1, db_options/0]). +-export([shard_config/1, db_options/0]). --export([zone_iteration_options/1]). +-export([shard_iteration_options/1]). -export([default_iteration_options/0]). -type backend_config() :: @@ -35,17 +35,16 @@ -define(APP, emqx_replay). --type zone() :: emqx_types:zone(). +-spec shard_config(emqx_replay:shard()) -> backend_config(). +shard_config(Shard) -> + DefaultShardConfig = application:get_env(?APP, default_shard_config, default_shard_config()), + Shards = application:get_env(?APP, shard_config, #{}), + maps:get(Shard, Shards, DefaultShardConfig). --spec zone_config(zone()) -> backend_config(). -zone_config(Zone) -> - DefaultZoneConfig = application:get_env(?APP, default_zone_config, default_zone_config()), - Zones = application:get_env(?APP, zone_config, #{}), - maps:get(Zone, Zones, DefaultZoneConfig). - --spec zone_iteration_options(zone()) -> emqx_replay_message_storage:iteration_options(). -zone_iteration_options(Zone) -> - case zone_config(Zone) of +-spec shard_iteration_options(emqx_replay:shard()) -> + emqx_replay_message_storage:iteration_options(). +shard_iteration_options(Shard) -> + case shard_config(Shard) of {emqx_replay_message_storage, Config} -> maps:get(iteration, Config, default_iteration_options()); {_Module, _} -> @@ -54,11 +53,11 @@ zone_iteration_options(Zone) -> -spec default_iteration_options() -> emqx_replay_message_storage:iteration_options(). default_iteration_options() -> - {emqx_replay_message_storage, Config} = default_zone_config(), + {emqx_replay_message_storage, Config} = default_shard_config(), maps:get(iteration, Config). --spec default_zone_config() -> backend_config(). -default_zone_config() -> +-spec default_shard_config() -> backend_config(). +default_shard_config() -> {emqx_replay_message_storage, #{ timestamp_bits => 64, topic_bits_per_level => [8, 8, 8, 32, 16], diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 8a74f248e..359db382d 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -30,7 +30,7 @@ %% behavior callbacks: -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). --export_type([cf_refs/0, gen_id/0, db_write_options/0]). +-export_type([cf_refs/0, gen_id/0, db_write_options/0, state/0, iterator/0]). -compile({inline, [meta_lookup/2]}). @@ -39,7 +39,7 @@ %%================================================================================ %% see rocksdb:db_options() --type options() :: proplists:proplist(). +% -type options() :: proplists:proplist(). -type db_write_options() :: proplists:proplist(). @@ -59,14 +59,14 @@ }. -record(s, { - zone :: emqx_types:zone(), + shard :: emqx_replay:shard(), db :: rocksdb:db_handle(), cf_iterator :: rocksdb:cf_handle(), cf_generations :: cf_refs() }). -record(it, { - zone :: emqx_types:zone(), + shard :: emqx_replay:shard(), gen :: gen_id(), replay :: emqx_replay:replay(), module :: module(), @@ -94,33 +94,35 @@ %% 3. `inplace_update_support`? -define(ITERATOR_CF_OPTS, []). --define(REF(Zone), {via, gproc, {n, l, {?MODULE, Zone}}}). +-define(REF(Shard), {via, gproc, {n, l, {?MODULE, Shard}}}). %%================================================================================ %% API funcions %%================================================================================ --spec start_link(emqx_types:zone()) -> {ok, pid()}. -start_link(Zone) -> - gen_server:start_link(?REF(Zone), ?MODULE, [Zone], []). +-spec start_link(emqx_replay:shard()) -> {ok, pid()}. +start_link(Shard) -> + gen_server:start_link(?REF(Shard), ?MODULE, [Shard], []). --spec create_generation(emqx_types:zone(), emqx_replay:time(), emqx_replay_conf:backend_config()) -> +-spec create_generation(emqx_replay:shard(), emqx_replay:time(), emqx_replay_conf:backend_config()) -> {ok, gen_id()} | {error, nonmonotonic}. -create_generation(Zone, Since, Config = {_Module, _Options}) -> - gen_server:call(?REF(Zone), {create_generation, Since, Config}). +create_generation(Shard, Since, Config = {_Module, _Options}) -> + gen_server:call(?REF(Shard), {create_generation, Since, Config}). --spec store(emqx_types:zone(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary()) -> +-spec store( + emqx_replay:shard(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary() +) -> ok | {error, _TODO}. -store(Zone, GUID, Time, Topic, Msg) -> - {_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Zone, Time), +store(Shard, GUID, Time, Topic, Msg) -> + {_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Shard, Time), Mod:store(Data, GUID, Time, Topic, Msg). --spec make_iterator(emqx_types:zone(), emqx_replay:replay()) -> +-spec make_iterator(emqx_replay:shard(), emqx_replay:replay()) -> {ok, iterator()} | {error, _TODO}. -make_iterator(Zone, Replay = {_, StartTime}) -> - {GenId, Gen} = meta_lookup_gen(Zone, StartTime), +make_iterator(Shard, Replay = {_, StartTime}) -> + {GenId, Gen} = meta_lookup_gen(Shard, StartTime), open_iterator(Gen, #it{ - zone = Zone, + shard = Shard, gen = GenId, replay = Replay }). @@ -148,30 +150,30 @@ next(It = #it{module = Mod, data = ItData}) -> preserve_iterator(It = #it{}, ReplayID) -> iterator_put_state(ReplayID, It). --spec restore_iterator(emqx_types:zone(), emqx_replay:replay_id()) -> +-spec restore_iterator(emqx_replay:shard(), emqx_replay:replay_id()) -> {ok, iterator()} | {error, _TODO}. -restore_iterator(Zone, ReplayID) -> - case iterator_get_state(Zone, ReplayID) of +restore_iterator(Shard, ReplayID) -> + case iterator_get_state(Shard, ReplayID) of {ok, Serial} -> - restore_iterator_state(Zone, Serial); + restore_iterator_state(Shard, Serial); not_found -> {error, not_found}; {error, _Reason} = Error -> Error end. --spec discard_iterator(emqx_types:zone(), emqx_replay:replay_id()) -> +-spec discard_iterator(emqx_replay:shard(), emqx_replay:replay_id()) -> ok | {error, _TODO}. -discard_iterator(Zone, ReplayID) -> - iterator_delete(Zone, ReplayID). +discard_iterator(Shard, ReplayID) -> + iterator_delete(Shard, ReplayID). %%================================================================================ %% behavior callbacks %%================================================================================ -init([Zone]) -> +init([Shard]) -> process_flag(trap_exit, true), - {ok, S0} = open_db(Zone), + {ok, S0} = open_db(Shard), S = ensure_current_generation(S0), ok = populate_metadata(S), {ok, S}. @@ -192,8 +194,8 @@ handle_cast(_Cast, S) -> handle_info(_Info, S) -> {noreply, S}. -terminate(_Reason, #s{db = DB, zone = Zone}) -> - meta_erase(Zone), +terminate(_Reason, #s{db = DB, shard = Shard}) -> + meta_erase(Shard), ok = rocksdb:close(DB). %%================================================================================ @@ -203,21 +205,21 @@ terminate(_Reason, #s{db = DB, zone = Zone}) -> -record(db, {handle :: rocksdb:db_handle(), cf_iterator :: rocksdb:cf_handle()}). -spec populate_metadata(state()) -> ok. -populate_metadata(S = #s{zone = Zone, db = DBHandle, cf_iterator = CFIterator}) -> - ok = meta_put(Zone, db, #db{handle = DBHandle, cf_iterator = CFIterator}), +populate_metadata(S = #s{shard = Shard, db = DBHandle, cf_iterator = CFIterator}) -> + ok = meta_put(Shard, db, #db{handle = DBHandle, cf_iterator = CFIterator}), Current = schema_get_current(DBHandle), lists:foreach(fun(GenId) -> populate_metadata(GenId, S) end, lists:seq(0, Current)). -spec populate_metadata(gen_id(), state()) -> ok. -populate_metadata(GenId, S = #s{zone = Zone, db = DBHandle}) -> +populate_metadata(GenId, S = #s{shard = Shard, db = DBHandle}) -> Gen = open_gen(GenId, schema_get_gen(DBHandle, GenId), S), - meta_register_gen(Zone, GenId, Gen). + meta_register_gen(Shard, GenId, Gen). -spec ensure_current_generation(state()) -> state(). -ensure_current_generation(S = #s{zone = Zone, db = DBHandle}) -> +ensure_current_generation(S = #s{shard = Shard, db = DBHandle}) -> case schema_get_current(DBHandle) of undefined -> - Config = emqx_replay_conf:zone_config(Zone), + Config = emqx_replay_conf:shard_config(Shard), {ok, _, NS} = create_new_gen(0, Config, S), NS; _GenId -> @@ -226,16 +228,16 @@ ensure_current_generation(S = #s{zone = Zone, db = DBHandle}) -> -spec create_new_gen(emqx_replay:time(), emqx_replay_conf:backend_config(), state()) -> {ok, gen_id(), state()} | {error, nonmonotonic}. -create_new_gen(Since, Config, S = #s{zone = Zone, db = DBHandle}) -> - GenId = get_next_id(meta_get_current(Zone)), +create_new_gen(Since, Config, S = #s{shard = Shard, db = DBHandle}) -> + GenId = get_next_id(meta_get_current(Shard)), GenId = get_next_id(schema_get_current(DBHandle)), - case is_gen_valid(Zone, GenId, Since) of + case is_gen_valid(Shard, GenId, Since) of ok -> {ok, Gen, NS} = create_gen(GenId, Since, Config, S), %% TODO: Transaction? Column family creation can't be transactional, anyway. ok = schema_put_gen(DBHandle, GenId, Gen), ok = schema_put_current(DBHandle, GenId), - ok = meta_register_gen(Zone, GenId, open_gen(GenId, Gen, NS)), + ok = meta_register_gen(Shard, GenId, open_gen(GenId, Gen, NS)), {ok, GenId, NS}; {error, _} = Error -> Error @@ -253,9 +255,9 @@ create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, cf_generations }, {ok, Gen, S#s{cf_generations = NewCFs ++ CFs}}. --spec open_db(emqx_types:zone()) -> {ok, state()} | {error, _TODO}. -open_db(Zone) -> - Filename = atom_to_list(Zone), +-spec open_db(emqx_replay:shard()) -> {ok, state()} | {error, _TODO}. +open_db(Shard) -> + Filename = binary_to_list(Shard), DBOptions = [ {create_if_missing, true}, {create_missing_column_families, true} @@ -278,7 +280,7 @@ open_db(Zone) -> {ok, DBHandle, [_CFDefault, CFIterator | CFRefs]} -> {CFNames, _} = lists:unzip(ExistingCFs), {ok, #s{ - zone = Zone, + shard = Shard, db = DBHandle, cf_iterator = CFIterator, cf_generations = lists:zip(CFNames, CFRefs) @@ -291,14 +293,14 @@ open_db(Zone) -> open_gen( GenId, Gen = #{module := Mod, data := Data}, - #s{zone = Zone, db = DBHandle, cf_generations = CFs} + #s{shard = Shard, db = DBHandle, cf_generations = CFs} ) -> - DB = Mod:open(Zone, DBHandle, GenId, CFs, Data), + DB = Mod:open(Shard, DBHandle, GenId, CFs, Data), Gen#{data := DB}. -spec open_next_iterator(iterator()) -> {ok, iterator()} | {error, _Reason} | none. -open_next_iterator(It = #it{zone = Zone, gen = GenId}) -> - open_next_iterator(meta_get_gen(Zone, GenId + 1), It#it{gen = GenId + 1}). +open_next_iterator(It = #it{shard = Shard, gen = GenId}) -> + open_next_iterator(meta_get_gen(Shard, GenId + 1), It#it{gen = GenId + 1}). open_next_iterator(undefined, _It) -> none; @@ -331,17 +333,17 @@ open_restore_iterator(#{module := Mod, data := Data}, It = #it{replay = Replay}, -define(ITERATION_WRITE_OPTS, []). -define(ITERATION_READ_OPTS, []). -iterator_get_state(Zone, ReplayID) -> - #db{handle = Handle, cf_iterator = CF} = meta_lookup(Zone, db), +iterator_get_state(Shard, ReplayID) -> + #db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db), rocksdb:get(Handle, CF, ?KEY_REPLAY_STATE(ReplayID), ?ITERATION_READ_OPTS). -iterator_put_state(ID, It = #it{zone = Zone}) -> - #db{handle = Handle, cf_iterator = CF} = meta_lookup(Zone, db), +iterator_put_state(ID, It = #it{shard = Shard}) -> + #db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db), Serial = preserve_iterator_state(It), rocksdb:put(Handle, CF, ?KEY_REPLAY_STATE(ID), Serial, ?ITERATION_WRITE_OPTS). -iterator_delete(Zone, ID) -> - #db{handle = Handle, cf_iterator = CF} = meta_lookup(Zone, db), +iterator_delete(Shard, ID) -> + #db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db), rocksdb:delete(Handle, CF, ?KEY_REPLAY_STATE(ID), ?ITERATION_WRITE_OPTS). preserve_iterator_state(#it{ @@ -358,10 +360,10 @@ preserve_iterator_state(#it{ st => Mod:preserve_iterator(ItData) }). -restore_iterator_state(Zone, Serial) when is_binary(Serial) -> - restore_iterator_state(Zone, binary_to_term(Serial)); +restore_iterator_state(Shard, Serial) when is_binary(Serial) -> + restore_iterator_state(Shard, binary_to_term(Serial)); restore_iterator_state( - Zone, + Shard, #{ v := 1, gen := Gen, @@ -370,8 +372,8 @@ restore_iterator_state( st := State } ) -> - It = #it{zone = Zone, gen = Gen, replay = {TopicFilter, StartTime}}, - open_restore_iterator(meta_get_gen(Zone, Gen), It, State). + It = #it{shard = Shard, gen = Gen, replay = {TopicFilter, StartTime}}, + open_restore_iterator(meta_get_gen(Shard, Gen), It, State). %% Functions for dealing with the metadata stored persistently in rocksdb @@ -409,27 +411,27 @@ schema_gen_key(N) -> -undef(SCHEMA_WRITE_OPTS). -undef(SCHEMA_READ_OPTS). -%% Functions for dealing with the runtime zone metadata: +%% Functions for dealing with the runtime shard metadata: --define(PERSISTENT_TERM(ZONE, GEN), {?MODULE, ZONE, GEN}). +-define(PERSISTENT_TERM(SHARD, GEN), {?MODULE, SHARD, GEN}). --spec meta_register_gen(emqx_types:zone(), gen_id(), generation()) -> ok. -meta_register_gen(Zone, GenId, Gen) -> +-spec meta_register_gen(emqx_replay:shard(), gen_id(), generation()) -> ok. +meta_register_gen(Shard, GenId, Gen) -> Gs = case GenId > 0 of - true -> meta_lookup(Zone, GenId - 1); + true -> meta_lookup(Shard, GenId - 1); false -> [] end, - ok = meta_put(Zone, GenId, [Gen | Gs]), - ok = meta_put(Zone, current, GenId). + ok = meta_put(Shard, GenId, [Gen | Gs]), + ok = meta_put(Shard, current, GenId). --spec meta_lookup_gen(emqx_types:zone(), emqx_replay:time()) -> {gen_id(), generation()}. -meta_lookup_gen(Zone, Time) -> +-spec meta_lookup_gen(emqx_replay:shard(), emqx_replay:time()) -> {gen_id(), generation()}. +meta_lookup_gen(Shard, Time) -> % TODO % Is cheaper persistent term GC on update here worth extra lookup? I'm leaning % towards a "no". - Current = meta_lookup(Zone, current), - Gens = meta_lookup(Zone, Current), + Current = meta_lookup(Shard, current), + Gens = meta_lookup(Shard, Current), find_gen(Time, Current, Gens). find_gen(Time, GenId, [Gen = #{since := Since} | _]) when Time >= Since -> @@ -437,34 +439,34 @@ find_gen(Time, GenId, [Gen = #{since := Since} | _]) when Time >= Since -> find_gen(Time, GenId, [_Gen | Rest]) -> find_gen(Time, GenId - 1, Rest). --spec meta_get_gen(emqx_types:zone(), gen_id()) -> generation() | undefined. -meta_get_gen(Zone, GenId) -> - case meta_lookup(Zone, GenId, []) of +-spec meta_get_gen(emqx_replay:shard(), gen_id()) -> generation() | undefined. +meta_get_gen(Shard, GenId) -> + case meta_lookup(Shard, GenId, []) of [Gen | _Older] -> Gen; [] -> undefined end. --spec meta_get_current(emqx_types:zone()) -> gen_id() | undefined. -meta_get_current(Zone) -> - meta_lookup(Zone, current, undefined). +-spec meta_get_current(emqx_replay:shard()) -> gen_id() | undefined. +meta_get_current(Shard) -> + meta_lookup(Shard, current, undefined). --spec meta_lookup(emqx_types:zone(), _K) -> _V. -meta_lookup(Zone, K) -> - persistent_term:get(?PERSISTENT_TERM(Zone, K)). +-spec meta_lookup(emqx_replay:shard(), _K) -> _V. +meta_lookup(Shard, K) -> + persistent_term:get(?PERSISTENT_TERM(Shard, K)). --spec meta_lookup(emqx_types:zone(), _K, Default) -> _V | Default. -meta_lookup(Zone, K, Default) -> - persistent_term:get(?PERSISTENT_TERM(Zone, K), Default). +-spec meta_lookup(emqx_replay:shard(), _K, Default) -> _V | Default. +meta_lookup(Shard, K, Default) -> + persistent_term:get(?PERSISTENT_TERM(Shard, K), Default). --spec meta_put(emqx_types:zone(), _K, _V) -> ok. -meta_put(Zone, K, V) -> - persistent_term:put(?PERSISTENT_TERM(Zone, K), V). +-spec meta_put(emqx_replay:shard(), _K, _V) -> ok. +meta_put(Shard, K, V) -> + persistent_term:put(?PERSISTENT_TERM(Shard, K), V). --spec meta_erase(emqx_types:zone()) -> ok. -meta_erase(Zone) -> +-spec meta_erase(emqx_replay:shard()) -> ok. +meta_erase(Shard) -> [ persistent_term:erase(K) - || {K = ?PERSISTENT_TERM(Z, _), _} <- persistent_term:get(), Z =:= Zone + || {K = ?PERSISTENT_TERM(Z, _), _} <- persistent_term:get(), Z =:= Shard ], ok. @@ -473,15 +475,15 @@ meta_erase(Zone) -> get_next_id(undefined) -> 0; get_next_id(GenId) -> GenId + 1. -is_gen_valid(Zone, GenId, Since) when GenId > 0 -> - [GenPrev | _] = meta_lookup(Zone, GenId - 1), +is_gen_valid(Shard, GenId, Since) when GenId > 0 -> + [GenPrev | _] = meta_lookup(Shard, GenId - 1), case GenPrev of #{since := SincePrev} when Since > SincePrev -> ok; #{} -> {error, nonmonotonic} end; -is_gen_valid(_Zone, 0, 0) -> +is_gen_valid(_Shard, 0, 0) -> ok. %% -spec store_cfs(rocksdb:db_handle(), [{string(), rocksdb:cf_handle()}]) -> ok. diff --git a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl index fb88ef212..6812d0ee9 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl @@ -18,7 +18,7 @@ -behavior(supervisor). %% API: --export([start_link/0, start_zone/1, stop_zone/1]). +-export([start_link/0, start_shard/1, stop_shard/1]). %% behavior callbacks: -export([init/1]). @@ -37,14 +37,14 @@ start_link() -> supervisor:start_link({local, ?SUP}, ?MODULE, []). --spec start_zone(emqx_types:zone()) -> supervisor:startchild_ret(). -start_zone(Zone) -> - supervisor:start_child(?SUP, zone_child_spec(Zone)). +-spec start_shard(emqx_replay:shard()) -> supervisor:startchild_ret(). +start_shard(Shard) -> + supervisor:start_child(?SUP, shard_child_spec(Shard)). --spec stop_zone(emqx_types:zone()) -> ok | {error, _}. -stop_zone(Zone) -> - ok = supervisor:terminate_child(?SUP, Zone), - ok = supervisor:delete_child(?SUP, Zone). +-spec stop_shard(emqx_replay:shard()) -> ok | {error, _}. +stop_shard(Shard) -> + ok = supervisor:terminate_child(?SUP, Shard), + ok = supervisor:delete_child(?SUP, Shard). %%================================================================================ %% behavior callbacks @@ -63,11 +63,11 @@ init([]) -> %% Internal functions %%================================================================================ --spec zone_child_spec(emqx_types:zone()) -> supervisor:child_spec(). -zone_child_spec(Zone) -> +-spec shard_child_spec(emqx_replay:shard()) -> supervisor:child_spec(). +shard_child_spec(Shard) -> #{ - id => Zone, - start => {emqx_replay_local_store, start_link, [Zone]}, + id => Shard, + start => {emqx_replay_local_store, start_link, [Shard]}, shutdown => 5_000, restart => permanent, type => worker diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index fbeb452c9..bfbaf55b3 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -182,7 +182,7 @@ -opaque schema() :: #schema{}. -record(db, { - zone :: emqx_types:zone(), + shard :: emqx_replay:shard(), handle :: rocksdb:db_handle(), cf :: rocksdb:cf_handle(), keymapper :: keymapper(), @@ -244,17 +244,17 @@ create_new(DBHandle, GenId, Options) -> %% Reopen the database -spec open( - emqx_types:zone(), + emqx_replay:shard(), rocksdb:db_handle(), emqx_replay_local_store:gen_id(), emqx_replay_local_store:cf_refs(), schema() ) -> db(). -open(Zone, DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) -> +open(Shard, DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) -> {value, {_, CFHandle}} = lists:keysearch(data_cf(GenId), 1, CFs), #db{ - zone = Zone, + shard = Shard, handle = DBHandle, cf = CFHandle, keymapper = Keymapper @@ -292,7 +292,7 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, -spec make_iterator(db(), emqx_replay:replay()) -> {ok, iterator()} | {error, _TODO}. make_iterator(DB, Replay) -> - Options = emqx_replay_conf:zone_iteration_options(DB#db.zone), + Options = emqx_replay_conf:shard_iteration_options(DB#db.shard), make_iterator(DB, Replay, Options). -spec make_iterator(db(), emqx_replay:replay(), iteration_options()) -> diff --git a/apps/emqx_replay/src/emqx_replay_sup.erl b/apps/emqx_replay/src/emqx_replay_sup.erl index a5da13c7a..969ce9a49 100644 --- a/apps/emqx_replay/src/emqx_replay_sup.erl +++ b/apps/emqx_replay/src/emqx_replay_sup.erl @@ -1,5 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2022 EMQ Technologies Co., Ltd. All Rights Reserved. +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ start_link() -> %%================================================================================ init([]) -> - Children = [zone_sup()], + Children = [shard_sup()], SupFlags = #{ strategy => one_for_all, intensity => 0, @@ -54,9 +54,9 @@ init([]) -> %% Internal functions %%================================================================================ -zone_sup() -> +shard_sup() -> #{ - id => local_store_zone_sup, + id => local_store_shard_sup, start => {emqx_replay_local_store_sup, start_link, []}, restart => permanent, type => supervisor, diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index afed30b88..cdf8a95e7 100644 --- a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -21,7 +21,7 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("stdlib/include/assert.hrl"). --define(ZONE, zone(?FUNCTION_NAME)). +-define(SHARD, shard(?FUNCTION_NAME)). -define(DEFAULT_CONFIG, {emqx_replay_message_storage, #{ @@ -44,8 +44,8 @@ %% Smoke test for opening and reopening the database t_open(_Config) -> - ok = emqx_replay_local_store_sup:stop_zone(?ZONE), - {ok, _} = emqx_replay_local_store_sup:start_zone(?ZONE). + ok = emqx_replay_local_store_sup:stop_shard(?SHARD), + {ok, _} = emqx_replay_local_store_sup:start_shard(?SHARD). %% Smoke test of store function t_store(_Config) -> @@ -53,7 +53,7 @@ t_store(_Config) -> PublishedAt = 1000, Topic = [<<"foo">>, <<"bar">>], Payload = <<"message">>, - ?assertMatch(ok, emqx_replay_local_store:store(?ZONE, MessageID, PublishedAt, Topic, Payload)). + ?assertMatch(ok, emqx_replay_local_store:store(?SHARD, MessageID, PublishedAt, Topic, Payload)). %% Smoke test for iteration through a concrete topic t_iterate(_Config) -> @@ -62,7 +62,7 @@ t_iterate(_Config) -> Timestamps = lists:seq(1, 10), [ emqx_replay_local_store:store( - ?ZONE, + ?SHARD, emqx_guid:gen(), PublishedAt, Topic, @@ -73,7 +73,7 @@ t_iterate(_Config) -> %% Iterate through individual topics: [ begin - {ok, It} = emqx_replay_local_store:make_iterator(?ZONE, {Topic, 0}), + {ok, It} = emqx_replay_local_store:make_iterator(?SHARD, {Topic, 0}), Values = iterate(It), ?assertEqual(lists:map(fun integer_to_binary/1, Timestamps), Values) end @@ -87,50 +87,50 @@ t_iterate_wildcard(_Config) -> Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], Timestamps = lists:seq(1, 10), _ = [ - store(?ZONE, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) + store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) || Topic <- Topics, PublishedAt <- Timestamps ], ?assertEqual( lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- Timestamps]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 0)]) ), ?assertEqual( [], - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "#", 10 + 1)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 10 + 1)]) ), ?assertEqual( lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- lists:seq(5, 10)]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "#", 5)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 5)]) ), ?assertEqual( lists:sort([ {Topic, PublishedAt} || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps ]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)]) ), ?assertEqual( lists:sort([{"foo/bar", PublishedAt} || PublishedAt <- Timestamps]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/+", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/+", 0)]) ), ?assertEqual( [], - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/+/bar", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/+/bar", 0)]) ), ?assertEqual( lists:sort([ {Topic, PublishedAt} || Topic <- ["foo/bar", "foo/bar/baz", "a/bar"], PublishedAt <- Timestamps ]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "+/bar/#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "+/bar/#", 0)]) ), ?assertEqual( lists:sort([{Topic, PublishedAt} || Topic <- ["a", "a/bar"], PublishedAt <- Timestamps]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "a/#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 0)]) ), ?assertEqual( [], - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "a/+/+", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/+/+", 0)]) ), ok. @@ -139,40 +139,40 @@ t_iterate_long_tail_wildcard(_Config) -> TopicFilter = "b/c/d/e/+/+", Timestamps = lists:seq(1, 100), _ = [ - store(?ZONE, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) + store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) || PublishedAt <- Timestamps ], ?assertEqual( lists:sort([{"b/c/d/e/f/g", PublishedAt} || PublishedAt <- lists:seq(50, 100)]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, TopicFilter, 50)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, TopicFilter, 50)]) ). t_create_gen(_Config) -> - {ok, 1} = emqx_replay_local_store:create_generation(?ZONE, 5, ?DEFAULT_CONFIG), + {ok, 1} = emqx_replay_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG), ?assertEqual( {error, nonmonotonic}, - emqx_replay_local_store:create_generation(?ZONE, 1, ?DEFAULT_CONFIG) + emqx_replay_local_store:create_generation(?SHARD, 1, ?DEFAULT_CONFIG) ), ?assertEqual( {error, nonmonotonic}, - emqx_replay_local_store:create_generation(?ZONE, 5, ?DEFAULT_CONFIG) + emqx_replay_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG) ), - {ok, 2} = emqx_replay_local_store:create_generation(?ZONE, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_replay_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), Topics = ["foo/bar", "foo/bar/baz"], Timestamps = lists:seq(1, 100), [ - ?assertEqual(ok, store(?ZONE, PublishedAt, Topic, <<>>)) + ?assertEqual(ok, store(?SHARD, PublishedAt, Topic, <<>>)) || Topic <- Topics, PublishedAt <- Timestamps ]. t_iterate_multigen(_Config) -> - {ok, 1} = emqx_replay_local_store:create_generation(?ZONE, 10, ?COMPACT_CONFIG), - {ok, 2} = emqx_replay_local_store:create_generation(?ZONE, 50, ?DEFAULT_CONFIG), - {ok, 3} = emqx_replay_local_store:create_generation(?ZONE, 1000, ?DEFAULT_CONFIG), + {ok, 1} = emqx_replay_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_replay_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_replay_local_store:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG), Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], Timestamps = lists:seq(1, 100), _ = [ - store(?ZONE, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) + store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt})) || Topic <- Topics, PublishedAt <- Timestamps ], ?assertEqual( @@ -180,38 +180,38 @@ t_iterate_multigen(_Config) -> {Topic, PublishedAt} || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps ]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "foo/#", 0)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)]) ), ?assertEqual( lists:sort([ {Topic, PublishedAt} || Topic <- ["a", "a/bar"], PublishedAt <- lists:seq(60, 100) ]), - lists:sort([binary_to_term(Payload) || Payload <- iterate(?ZONE, "a/#", 60)]) + lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 60)]) ). t_iterate_multigen_preserve_restore(_Config) -> ReplayID = atom_to_binary(?FUNCTION_NAME), - {ok, 1} = emqx_replay_local_store:create_generation(?ZONE, 10, ?COMPACT_CONFIG), - {ok, 2} = emqx_replay_local_store:create_generation(?ZONE, 50, ?DEFAULT_CONFIG), - {ok, 3} = emqx_replay_local_store:create_generation(?ZONE, 100, ?DEFAULT_CONFIG), + {ok, 1} = emqx_replay_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_replay_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_replay_local_store:create_generation(?SHARD, 100, ?DEFAULT_CONFIG), Topics = ["foo/bar", "foo/bar/baz", "a/bar"], Timestamps = lists:seq(1, 100), TopicFilter = "foo/#", TopicsMatching = ["foo/bar", "foo/bar/baz"], _ = [ - store(?ZONE, TS, Topic, term_to_binary({Topic, TS})) + store(?SHARD, TS, Topic, term_to_binary({Topic, TS})) || Topic <- Topics, TS <- Timestamps ], - It0 = iterator(?ZONE, TopicFilter, 0), + It0 = iterator(?SHARD, TopicFilter, 0), {It1, Res10} = iterate(It0, 10), % preserve mid-generation ok = emqx_replay_local_store:preserve_iterator(It1, ReplayID), - {ok, It2} = emqx_replay_local_store:restore_iterator(?ZONE, ReplayID), + {ok, It2} = emqx_replay_local_store:restore_iterator(?SHARD, ReplayID), {It3, Res100} = iterate(It2, 88), % preserve on the generation boundary ok = emqx_replay_local_store:preserve_iterator(It3, ReplayID), - {ok, It4} = emqx_replay_local_store:restore_iterator(?ZONE, ReplayID), + {ok, It4} = emqx_replay_local_store:restore_iterator(?SHARD, ReplayID), {It5, Res200} = iterate(It4, 1000), ?assertEqual(none, It5), ?assertEqual( @@ -220,16 +220,16 @@ t_iterate_multigen_preserve_restore(_Config) -> ), ?assertEqual( ok, - emqx_replay_local_store:discard_iterator(?ZONE, ReplayID) + emqx_replay_local_store:discard_iterator(?SHARD, ReplayID) ), ?assertEqual( {error, not_found}, - emqx_replay_local_store:restore_iterator(?ZONE, ReplayID) + emqx_replay_local_store:restore_iterator(?SHARD, ReplayID) ). -store(Zone, PublishedAt, Topic, Payload) -> +store(Shard, PublishedAt, Topic, Payload) -> ID = emqx_guid:gen(), - emqx_replay_local_store:store(Zone, ID, PublishedAt, parse_topic(Topic), Payload). + emqx_replay_local_store:store(Shard, ID, PublishedAt, parse_topic(Topic), Payload). iterate(DB, TopicFilter, StartTime) -> iterate(iterator(DB, TopicFilter, StartTime)). @@ -274,15 +274,15 @@ end_per_suite(_Config) -> ok = application:stop(emqx_replay). init_per_testcase(TC, Config) -> - ok = set_zone_config(zone(TC), ?DEFAULT_CONFIG), - {ok, _} = emqx_replay_local_store_sup:start_zone(zone(TC)), + ok = set_shard_config(shard(TC), ?DEFAULT_CONFIG), + {ok, _} = emqx_replay_local_store_sup:start_shard(shard(TC)), Config. end_per_testcase(TC, _Config) -> - ok = emqx_replay_local_store_sup:stop_zone(zone(TC)). + ok = emqx_replay_local_store_sup:stop_shard(shard(TC)). -zone(TC) -> - list_to_atom(lists:concat([?MODULE, "_", TC])). +shard(TC) -> + list_to_binary(lists:concat([?MODULE, "_", TC])). -set_zone_config(Zone, Config) -> - ok = application:set_env(emqx_replay, zone_config, #{Zone => Config}). +set_shard_config(Shard, Config) -> + ok = application:set_env(emqx_replay, shard_config, #{Shard => Config}). From 0f6c81e42d3bc69fa0318cb7e872416a1630dd68 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 15 May 2023 10:58:12 +0200 Subject: [PATCH 40/49] chore(ds): Change license to BSL --- apps/emqx_replay/BSL.txt | 94 +++++++++++++++++++ apps/emqx_replay/README.md | 37 ++++++++ apps/emqx_replay/src/emqx_replay.erl | 12 --- apps/emqx_replay/src/emqx_replay_app.erl | 12 --- apps/emqx_replay/src/emqx_replay_conf.erl | 12 --- .../src/emqx_replay_local_store.erl | 12 --- .../src/emqx_replay_local_store_sup.erl | 12 --- .../src/emqx_replay_message_storage.erl | 12 --- apps/emqx_replay/src/emqx_replay_sup.erl | 12 --- .../test/emqx_replay_local_store_SUITE.erl | 12 --- .../emqx_replay_message_storage_SUITE.erl | 12 --- .../emqx_replay_message_storage_shim.erl | 14 +-- .../props/prop_replay_message_storage.erl | 14 +-- 13 files changed, 133 insertions(+), 134 deletions(-) create mode 100644 apps/emqx_replay/BSL.txt create mode 100644 apps/emqx_replay/README.md diff --git a/apps/emqx_replay/BSL.txt b/apps/emqx_replay/BSL.txt new file mode 100644 index 000000000..2374e6ce2 --- /dev/null +++ b/apps/emqx_replay/BSL.txt @@ -0,0 +1,94 @@ +Business Source License 1.1 + +Licensor: Hangzhou EMQ Technologies Co., Ltd. +Licensed Work: EMQX Enterprise Edition + The Licensed Work is (c) 2023 + Hangzhou EMQ Technologies Co., Ltd. +Additional Use Grant: Students and educators are granted right to copy, + modify, and create derivative work for research + or education. +Change Date: 2027-06-01 +Change License: Apache License, Version 2.0 + +For information about alternative licensing arrangements for the Software, +please contact Licensor: https://www.emqx.com/en/contact + +Notice + +The Business Source License (this document, or the “License”) is not an Open +Source license. However, the Licensed Work will eventually be made available +under an Open Source License, as stated in this License. + +License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved. +“Business Source License” is a trademark of MariaDB Corporation Ab. + +----------------------------------------------------------------------------- + +Business Source License 1.1 + +Terms + +The Licensor hereby grants you the right to copy, modify, create derivative +works, redistribute, and make non-production use of the Licensed Work. The +Licensor may make an Additional Use Grant, above, permitting limited +production use. + +Effective on the Change Date, or the fourth anniversary of the first publicly +available distribution of a specific version of the Licensed Work under this +License, whichever comes first, the Licensor hereby grants you rights under +the terms of the Change License, and the rights granted in the paragraph +above terminate. + +If your use of the Licensed Work does not comply with the requirements +currently in effect as described in this License, you must purchase a +commercial license from the Licensor, its affiliated entities, or authorized +resellers, or you must refrain from using the Licensed Work. + +All copies of the original and modified Licensed Work, and derivative works +of the Licensed Work, are subject to this License. This License applies +separately for each version of the Licensed Work and the Change Date may vary +for each version of the Licensed Work released by Licensor. + +You must conspicuously display this License on each original or modified copy +of the Licensed Work. If you receive the Licensed Work in original or +modified form from a third party, the terms and conditions set forth in this +License apply to your use of that work. + +Any use of the Licensed Work in violation of this License will automatically +terminate your rights under this License for the current and all other +versions of the Licensed Work. + +This License does not grant you any right in any trademark or logo of +Licensor or its affiliates (provided that you may use a trademark or logo of +Licensor as expressly required by this License). + +TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON +AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, +EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND +TITLE. + +MariaDB hereby grants you permission to use this License’s text to license +your works, and to refer to it using the trademark “Business Source License”, +as long as you comply with the Covenants of Licensor below. + +Covenants of Licensor + +In consideration of the right to use this License’s text and the “Business +Source License” name and trademark, Licensor covenants to MariaDB, and to all +other recipients of the licensed work to be provided by Licensor: + +1. To specify as the Change License the GPL Version 2.0 or any later version, + or a license that is compatible with GPL Version 2.0 or a later version, + where “compatible” means that software provided under the Change License can + be included in a program with software provided under GPL Version 2.0 or a + later version. Licensor may specify additional Change Licenses without + limitation. + +2. To either: (a) specify an additional grant of rights to use that does not + impose any additional restriction on the right granted in this License, as + the Additional Use Grant; or (b) insert the text “None”. + +3. To specify a Change Date. + +4. Not to modify this License in any other way. diff --git a/apps/emqx_replay/README.md b/apps/emqx_replay/README.md new file mode 100644 index 000000000..50e539cdc --- /dev/null +++ b/apps/emqx_replay/README.md @@ -0,0 +1,37 @@ +# EMQX Replay + +`emqx_replay` is a durable storage for MQTT messages within EMQX. +It implements the following scenarios: +- Persisting messages published by clients +- + +> 0. App overview introduction +> 1. let people know what your project can do specifically. Is it a base +> library dependency, or what kind of functionality is provided to the user? +> 2. Provide context and add a link to any reference visitors might be +> unfamiliar with. +> 3. Design details, implementation technology architecture, Roadmap, etc. + +# [Features] - [Optional] +> A List of features your application provided. If the feature is quite simple, just +> list in the previous section. + +# Limitation +TBD + +# Documentation links +TBD + +# Usage +TBD + +# Configurations +TBD + +# HTTP APIs + +# Other +TBD + +# Contributing +Please see our [contributing.md](../../CONTRIBUTING.md). diff --git a/apps/emqx_replay/src/emqx_replay.erl b/apps/emqx_replay/src/emqx_replay.erl index 91ce94134..3f7142978 100644 --- a/apps/emqx_replay/src/emqx_replay.erl +++ b/apps/emqx_replay/src/emqx_replay.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay). diff --git a/apps/emqx_replay/src/emqx_replay_app.erl b/apps/emqx_replay/src/emqx_replay_app.erl index bf6fd0b55..17de0f28c 100644 --- a/apps/emqx_replay/src/emqx_replay_app.erl +++ b/apps/emqx_replay/src/emqx_replay_app.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_app). diff --git a/apps/emqx_replay/src/emqx_replay_conf.erl b/apps/emqx_replay/src/emqx_replay_conf.erl index 45f1b4fa5..99405dfda 100644 --- a/apps/emqx_replay/src/emqx_replay_conf.erl +++ b/apps/emqx_replay/src/emqx_replay_conf.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_conf). diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 359db382d..8ff6a6e56 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_local_store). diff --git a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl index 6812d0ee9..25518625b 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_local_store_sup). diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_replay/src/emqx_replay_message_storage.erl index bfbaf55b3..f4d6c8e66 100644 --- a/apps/emqx_replay/src/emqx_replay_message_storage.erl +++ b/apps/emqx_replay/src/emqx_replay_message_storage.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_message_storage). diff --git a/apps/emqx_replay/src/emqx_replay_sup.erl b/apps/emqx_replay/src/emqx_replay_sup.erl index 969ce9a49..1245c0084 100644 --- a/apps/emqx_replay/src/emqx_replay_sup.erl +++ b/apps/emqx_replay/src/emqx_replay_sup.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_sup). diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl index cdf8a95e7..da6fef09d 100644 --- a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_local_store_SUITE). diff --git a/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl b/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl index 3fca48a7b..a26579299 100644 --- a/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl +++ b/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. %%-------------------------------------------------------------------- -module(emqx_replay_message_storage_SUITE). diff --git a/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl b/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl index 160451cb9..f8e5c33d9 100644 --- a/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl +++ b/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- -module(emqx_replay_message_storage_shim). diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_replay/test/props/prop_replay_message_storage.erl index ede6dc336..7713a66a6 100644 --- a/apps/emqx_replay/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_replay/test/props/prop_replay_message_storage.erl @@ -1,17 +1,5 @@ %%-------------------------------------------------------------------- -%% Copyright (c) 2020-2022 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- -module(prop_replay_message_storage). From efed7cdc2f895c981932322f9b4bdc96617949f1 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 15 May 2023 11:39:48 +0200 Subject: [PATCH 41/49] chore(ds): behavior -> behaviour --- apps/emqx/src/proto/emqx_shared_sub_proto_v1.erl | 2 +- apps/emqx_replay/src/emqx_replay.erl | 2 +- apps/emqx_replay/src/emqx_replay_local_store.erl | 6 +++--- apps/emqx_replay/src/emqx_replay_local_store_sup.erl | 6 +++--- apps/emqx_replay/src/emqx_replay_sup.erl | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/apps/emqx/src/proto/emqx_shared_sub_proto_v1.erl b/apps/emqx/src/proto/emqx_shared_sub_proto_v1.erl index d036947c6..eeb1fbbcd 100644 --- a/apps/emqx/src/proto/emqx_shared_sub_proto_v1.erl +++ b/apps/emqx/src/proto/emqx_shared_sub_proto_v1.erl @@ -26,7 +26,7 @@ -include("bpapi.hrl"). %%================================================================================ -%% behavior callbacks +%% behaviour callbacks %%================================================================================ introduced_in() -> diff --git a/apps/emqx_replay/src/emqx_replay.erl b/apps/emqx_replay/src/emqx_replay.erl index 3f7142978..f1ea7beac 100644 --- a/apps/emqx_replay/src/emqx_replay.erl +++ b/apps/emqx_replay/src/emqx_replay.erl @@ -35,7 +35,7 @@ %%================================================================================ %%================================================================================ -%% behavior callbacks +%% behaviour callbacks %%================================================================================ %%================================================================================ diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 8ff6a6e56..065eb0f25 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -3,7 +3,7 @@ %%-------------------------------------------------------------------- -module(emqx_replay_local_store). --behavior(gen_server). +-behaviour(gen_server). %% API: -export([start_link/1]). @@ -15,7 +15,7 @@ -export([preserve_iterator/2, restore_iterator/2, discard_iterator/2]). -%% behavior callbacks: +%% behaviour callbacks: -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). -export_type([cf_refs/0, gen_id/0, db_write_options/0, state/0, iterator/0]). @@ -156,7 +156,7 @@ discard_iterator(Shard, ReplayID) -> iterator_delete(Shard, ReplayID). %%================================================================================ -%% behavior callbacks +%% behaviour callbacks %%================================================================================ init([Shard]) -> diff --git a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl index 25518625b..13f20c6bc 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store_sup.erl @@ -3,12 +3,12 @@ %%-------------------------------------------------------------------- -module(emqx_replay_local_store_sup). --behavior(supervisor). +-behaviour(supervisor). %% API: -export([start_link/0, start_shard/1, stop_shard/1]). -%% behavior callbacks: +%% behaviour callbacks: -export([init/1]). %%================================================================================ @@ -35,7 +35,7 @@ stop_shard(Shard) -> ok = supervisor:delete_child(?SUP, Shard). %%================================================================================ -%% behavior callbacks +%% behaviour callbacks %%================================================================================ init([]) -> diff --git a/apps/emqx_replay/src/emqx_replay_sup.erl b/apps/emqx_replay/src/emqx_replay_sup.erl index 1245c0084..945a71180 100644 --- a/apps/emqx_replay/src/emqx_replay_sup.erl +++ b/apps/emqx_replay/src/emqx_replay_sup.erl @@ -3,12 +3,12 @@ %%-------------------------------------------------------------------- -module(emqx_replay_sup). --behavior(supervisor). +-behaviour(supervisor). %% API: -export([start_link/0]). -%% behavior callbacks: +%% behaviour callbacks: -export([init/1]). %%================================================================================ @@ -26,7 +26,7 @@ start_link() -> supervisor:start_link({local, ?SUP}, ?MODULE, []). %%================================================================================ -%% behavior callbacks +%% behaviour callbacks %%================================================================================ init([]) -> From c4f721f2148a60008deedf1215541d36bd22fa43 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 15 May 2023 12:05:18 +0200 Subject: [PATCH 42/49] chore(ds): Add callback definitions to local_store --- .../src/emqx_replay_local_store.erl | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_replay/src/emqx_replay_local_store.erl index 065eb0f25..c8041297b 100644 --- a/apps/emqx_replay/src/emqx_replay_local_store.erl +++ b/apps/emqx_replay/src/emqx_replay_local_store.erl @@ -84,6 +84,28 @@ -define(REF(Shard), {via, gproc, {n, l, {?MODULE, Shard}}}). +%%================================================================================ +%% Callbacks +%%================================================================================ + +-callback create_new(rocksdb:db_handle(), gen_id(), _Options :: term()) -> + {_Schema, cf_refs()}. + +-callback open(emqx_replay:shard(), rocksdb:db_handle(), gen_id(), cf_refs(), _Schema) -> + term(). + +-callback store(_Schema, binary(), emqx_replay:time(), emqx_replay:topic(), binary()) -> + ok | {error, _}. + +-callback make_iterator(_Schema, emqx_replay:replay()) -> + {ok, _It} | {error, _}. + +-callback restore_iterator(_Schema, emqx_replay:replay(), binary()) -> {ok, _It} | {error, _}. + +-callback preserve_iterator(_Schema, _It) -> term(). + +-callback next(It) -> {value, binary(), It} | none | {error, closed}. + %%================================================================================ %% API funcions %%================================================================================ @@ -100,7 +122,7 @@ create_generation(Shard, Since, Config = {_Module, _Options}) -> -spec store( emqx_replay:shard(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary() ) -> - ok | {error, _TODO}. + ok | {error, _}. store(Shard, GUID, Time, Topic, Msg) -> {_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Shard, Time), Mod:store(Data, GUID, Time, Topic, Msg). From 8d6bcc141439f51345c6321533a18cc114a3e532 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 17 May 2023 15:29:43 +0200 Subject: [PATCH 43/49] refactor(ds): emqx_replay -> emqx_durable_storage --- apps/{emqx_replay => emqx_durable_storage}/BSL.txt | 0 apps/{emqx_replay => emqx_durable_storage}/README.md | 0 .../{emqx_replay => emqx_durable_storage}/src/emqx_replay.app.src | 0 apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay.erl | 0 .../{emqx_replay => emqx_durable_storage}/src/emqx_replay_app.erl | 0 .../src/emqx_replay_conf.erl | 0 .../src/emqx_replay_local_store.erl | 0 .../src/emqx_replay_local_store_sup.erl | 0 .../src/emqx_replay_message_storage.erl | 0 .../{emqx_replay => emqx_durable_storage}/src/emqx_replay_sup.erl | 0 .../test/emqx_replay_local_store_SUITE.erl | 0 .../test/emqx_replay_message_storage_SUITE.erl | 0 .../test/props/emqx_replay_message_storage_shim.erl | 0 .../test/props/payload_gen.erl | 0 .../test/props/prop_replay_message_storage.erl | 0 15 files changed, 0 insertions(+), 0 deletions(-) rename apps/{emqx_replay => emqx_durable_storage}/BSL.txt (100%) rename apps/{emqx_replay => emqx_durable_storage}/README.md (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay.app.src (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay_app.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay_conf.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay_local_store.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay_local_store_sup.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay_message_storage.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/src/emqx_replay_sup.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/test/emqx_replay_local_store_SUITE.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/test/emqx_replay_message_storage_SUITE.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/test/props/emqx_replay_message_storage_shim.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/test/props/payload_gen.erl (100%) rename apps/{emqx_replay => emqx_durable_storage}/test/props/prop_replay_message_storage.erl (100%) diff --git a/apps/emqx_replay/BSL.txt b/apps/emqx_durable_storage/BSL.txt similarity index 100% rename from apps/emqx_replay/BSL.txt rename to apps/emqx_durable_storage/BSL.txt diff --git a/apps/emqx_replay/README.md b/apps/emqx_durable_storage/README.md similarity index 100% rename from apps/emqx_replay/README.md rename to apps/emqx_durable_storage/README.md diff --git a/apps/emqx_replay/src/emqx_replay.app.src b/apps/emqx_durable_storage/src/emqx_replay.app.src similarity index 100% rename from apps/emqx_replay/src/emqx_replay.app.src rename to apps/emqx_durable_storage/src/emqx_replay.app.src diff --git a/apps/emqx_replay/src/emqx_replay.erl b/apps/emqx_durable_storage/src/emqx_replay.erl similarity index 100% rename from apps/emqx_replay/src/emqx_replay.erl rename to apps/emqx_durable_storage/src/emqx_replay.erl diff --git a/apps/emqx_replay/src/emqx_replay_app.erl b/apps/emqx_durable_storage/src/emqx_replay_app.erl similarity index 100% rename from apps/emqx_replay/src/emqx_replay_app.erl rename to apps/emqx_durable_storage/src/emqx_replay_app.erl diff --git a/apps/emqx_replay/src/emqx_replay_conf.erl b/apps/emqx_durable_storage/src/emqx_replay_conf.erl similarity index 100% rename from apps/emqx_replay/src/emqx_replay_conf.erl rename to apps/emqx_durable_storage/src/emqx_replay_conf.erl diff --git a/apps/emqx_replay/src/emqx_replay_local_store.erl b/apps/emqx_durable_storage/src/emqx_replay_local_store.erl similarity index 100% rename from apps/emqx_replay/src/emqx_replay_local_store.erl rename to apps/emqx_durable_storage/src/emqx_replay_local_store.erl diff --git a/apps/emqx_replay/src/emqx_replay_local_store_sup.erl b/apps/emqx_durable_storage/src/emqx_replay_local_store_sup.erl similarity index 100% rename from apps/emqx_replay/src/emqx_replay_local_store_sup.erl rename to apps/emqx_durable_storage/src/emqx_replay_local_store_sup.erl diff --git a/apps/emqx_replay/src/emqx_replay_message_storage.erl b/apps/emqx_durable_storage/src/emqx_replay_message_storage.erl similarity index 100% rename from apps/emqx_replay/src/emqx_replay_message_storage.erl rename to apps/emqx_durable_storage/src/emqx_replay_message_storage.erl diff --git a/apps/emqx_replay/src/emqx_replay_sup.erl b/apps/emqx_durable_storage/src/emqx_replay_sup.erl similarity index 100% rename from apps/emqx_replay/src/emqx_replay_sup.erl rename to apps/emqx_durable_storage/src/emqx_replay_sup.erl diff --git a/apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_durable_storage/test/emqx_replay_local_store_SUITE.erl similarity index 100% rename from apps/emqx_replay/test/emqx_replay_local_store_SUITE.erl rename to apps/emqx_durable_storage/test/emqx_replay_local_store_SUITE.erl diff --git a/apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl b/apps/emqx_durable_storage/test/emqx_replay_message_storage_SUITE.erl similarity index 100% rename from apps/emqx_replay/test/emqx_replay_message_storage_SUITE.erl rename to apps/emqx_durable_storage/test/emqx_replay_message_storage_SUITE.erl diff --git a/apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl b/apps/emqx_durable_storage/test/props/emqx_replay_message_storage_shim.erl similarity index 100% rename from apps/emqx_replay/test/props/emqx_replay_message_storage_shim.erl rename to apps/emqx_durable_storage/test/props/emqx_replay_message_storage_shim.erl diff --git a/apps/emqx_replay/test/props/payload_gen.erl b/apps/emqx_durable_storage/test/props/payload_gen.erl similarity index 100% rename from apps/emqx_replay/test/props/payload_gen.erl rename to apps/emqx_durable_storage/test/props/payload_gen.erl diff --git a/apps/emqx_replay/test/props/prop_replay_message_storage.erl b/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl similarity index 100% rename from apps/emqx_replay/test/props/prop_replay_message_storage.erl rename to apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl From 1159f99432373a1a1f8ac15590f8ab722ee5c186 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 17 May 2023 15:46:29 +0200 Subject: [PATCH 44/49] refactor(ds): emqx_replay -> emqx_ds --- apps/emqx_durable_storage/README.md | 2 +- .../{emqx_replay.app.src => emqx_ds.app.src} | 4 +- .../{emqx_replay_app.erl => emqx_ds_app.erl} | 4 +- ...{emqx_replay_conf.erl => emqx_ds_conf.erl} | 22 +++---- ...ocal_store.erl => emqx_ds_local_store.erl} | 58 ++++++++-------- ...re_sup.erl => emqx_ds_local_store_sup.erl} | 10 +-- ...torage.erl => emqx_ds_message_storage.erl} | 36 +++++----- .../{emqx_replay.erl => emqx_ds_replay.erl} | 2 +- .../{emqx_replay_sup.erl => emqx_ds_sup.erl} | 4 +- ...UITE.erl => emqx_ds_local_store_SUITE.erl} | 66 +++++++++---------- ....erl => emqx_ds_message_storage_SUITE.erl} | 4 +- ...m.erl => emqx_ds_message_storage_shim.erl} | 4 +- .../props/prop_replay_message_storage.erl | 46 ++++++------- 13 files changed, 131 insertions(+), 131 deletions(-) rename apps/emqx_durable_storage/src/{emqx_replay.app.src => emqx_ds.app.src} (81%) rename apps/emqx_durable_storage/src/{emqx_replay_app.erl => emqx_ds_app.erl} (81%) rename apps/emqx_durable_storage/src/{emqx_replay_conf.erl => emqx_ds_conf.erl} (71%) rename apps/emqx_durable_storage/src/{emqx_replay_local_store.erl => emqx_ds_local_store.erl} (88%) rename apps/emqx_durable_storage/src/{emqx_replay_local_store_sup.erl => emqx_ds_local_store_sup.erl} (85%) rename apps/emqx_durable_storage/src/{emqx_replay_message_storage.erl => emqx_ds_message_storage.erl} (96%) rename apps/emqx_durable_storage/src/{emqx_replay.erl => emqx_ds_replay.erl} (98%) rename apps/emqx_durable_storage/src/{emqx_replay_sup.erl => emqx_ds_sup.erl} (94%) rename apps/emqx_durable_storage/test/{emqx_replay_local_store_SUITE.erl => emqx_ds_local_store_SUITE.erl} (76%) rename apps/emqx_durable_storage/test/{emqx_replay_message_storage_SUITE.erl => emqx_ds_message_storage_SUITE.erl} (98%) rename apps/emqx_durable_storage/test/props/{emqx_replay_message_storage_shim.erl => emqx_ds_message_storage_shim.erl} (92%) diff --git a/apps/emqx_durable_storage/README.md b/apps/emqx_durable_storage/README.md index 50e539cdc..7de43bee0 100644 --- a/apps/emqx_durable_storage/README.md +++ b/apps/emqx_durable_storage/README.md @@ -1,6 +1,6 @@ # EMQX Replay -`emqx_replay` is a durable storage for MQTT messages within EMQX. +`emqx_ds` is a durable storage for MQTT messages within EMQX. It implements the following scenarios: - Persisting messages published by clients - diff --git a/apps/emqx_durable_storage/src/emqx_replay.app.src b/apps/emqx_durable_storage/src/emqx_ds.app.src similarity index 81% rename from apps/emqx_durable_storage/src/emqx_replay.app.src rename to apps/emqx_durable_storage/src/emqx_ds.app.src index 9c00a78ca..5a45c08d6 100644 --- a/apps/emqx_durable_storage/src/emqx_replay.app.src +++ b/apps/emqx_durable_storage/src/emqx_ds.app.src @@ -1,11 +1,11 @@ %% -*- mode: erlang -*- -{application, emqx_replay, [ +{application, emqx_ds, [ {description, "Message persistence and subscription replays for EMQX"}, % strict semver, bump manually! {vsn, "0.1.0"}, {modules, []}, {registered, []}, {applications, [kernel, stdlib, rocksdb, gproc]}, - {mod, {emqx_replay_app, []}}, + {mod, {emqx_ds_app, []}}, {env, []} ]}. diff --git a/apps/emqx_durable_storage/src/emqx_replay_app.erl b/apps/emqx_durable_storage/src/emqx_ds_app.erl similarity index 81% rename from apps/emqx_durable_storage/src/emqx_replay_app.erl rename to apps/emqx_durable_storage/src/emqx_ds_app.erl index 17de0f28c..858855b6f 100644 --- a/apps/emqx_durable_storage/src/emqx_replay_app.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_app.erl @@ -2,9 +2,9 @@ %% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_app). +-module(emqx_ds_app). -export([start/2]). start(_Type, _Args) -> - emqx_replay_sup:start_link(). + emqx_ds_sup:start_link(). diff --git a/apps/emqx_durable_storage/src/emqx_replay_conf.erl b/apps/emqx_durable_storage/src/emqx_ds_conf.erl similarity index 71% rename from apps/emqx_durable_storage/src/emqx_replay_conf.erl rename to apps/emqx_durable_storage/src/emqx_ds_conf.erl index 99405dfda..f5761e8a0 100644 --- a/apps/emqx_durable_storage/src/emqx_replay_conf.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_conf.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_conf). +-module(emqx_ds_conf). %% TODO: make a proper HOCON schema and all... @@ -12,7 +12,7 @@ -export([default_iteration_options/0]). -type backend_config() :: - {emqx_replay_message_storage, emqx_replay_message_storage:options()} + {emqx_ds_message_storage, emqx_ds_message_storage:options()} | {module(), _Options}. -export_type([backend_config/0]). @@ -21,32 +21,32 @@ %% API funcions %%================================================================================ --define(APP, emqx_replay). +-define(APP, emqx_ds). --spec shard_config(emqx_replay:shard()) -> backend_config(). +-spec shard_config(emqx_ds:shard()) -> backend_config(). shard_config(Shard) -> DefaultShardConfig = application:get_env(?APP, default_shard_config, default_shard_config()), Shards = application:get_env(?APP, shard_config, #{}), maps:get(Shard, Shards, DefaultShardConfig). --spec shard_iteration_options(emqx_replay:shard()) -> - emqx_replay_message_storage:iteration_options(). +-spec shard_iteration_options(emqx_ds:shard()) -> + emqx_ds_message_storage:iteration_options(). shard_iteration_options(Shard) -> case shard_config(Shard) of - {emqx_replay_message_storage, Config} -> + {emqx_ds_message_storage, Config} -> maps:get(iteration, Config, default_iteration_options()); {_Module, _} -> default_iteration_options() end. --spec default_iteration_options() -> emqx_replay_message_storage:iteration_options(). +-spec default_iteration_options() -> emqx_ds_message_storage:iteration_options(). default_iteration_options() -> - {emqx_replay_message_storage, Config} = default_shard_config(), + {emqx_ds_message_storage, Config} = default_shard_config(), maps:get(iteration, Config). -spec default_shard_config() -> backend_config(). default_shard_config() -> - {emqx_replay_message_storage, #{ + {emqx_ds_message_storage, #{ timestamp_bits => 64, topic_bits_per_level => [8, 8, 8, 32, 16], epoch => 5, @@ -55,6 +55,6 @@ default_shard_config() -> } }}. --spec db_options() -> emqx_replay_local_store:db_options(). +-spec db_options() -> emqx_ds_local_store:db_options(). db_options() -> application:get_env(?APP, db_options, []). diff --git a/apps/emqx_durable_storage/src/emqx_replay_local_store.erl b/apps/emqx_durable_storage/src/emqx_ds_local_store.erl similarity index 88% rename from apps/emqx_durable_storage/src/emqx_replay_local_store.erl rename to apps/emqx_durable_storage/src/emqx_ds_local_store.erl index c8041297b..45845d714 100644 --- a/apps/emqx_durable_storage/src/emqx_replay_local_store.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_local_store.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_local_store). +-module(emqx_ds_local_store). -behaviour(gen_server). @@ -43,20 +43,20 @@ %% When should this generation become active? %% This generation should only contain messages timestamped no earlier than that. %% The very first generation will have `since` equal 0. - since := emqx_replay:time() + since := emqx_ds:time() }. -record(s, { - shard :: emqx_replay:shard(), + shard :: emqx_ds:shard(), db :: rocksdb:db_handle(), cf_iterator :: rocksdb:cf_handle(), cf_generations :: cf_refs() }). -record(it, { - shard :: emqx_replay:shard(), + shard :: emqx_ds:shard(), gen :: gen_id(), - replay :: emqx_replay:replay(), + replay :: emqx_ds:replay(), module :: module(), data :: term() }). @@ -91,16 +91,16 @@ -callback create_new(rocksdb:db_handle(), gen_id(), _Options :: term()) -> {_Schema, cf_refs()}. --callback open(emqx_replay:shard(), rocksdb:db_handle(), gen_id(), cf_refs(), _Schema) -> +-callback open(emqx_ds:shard(), rocksdb:db_handle(), gen_id(), cf_refs(), _Schema) -> term(). --callback store(_Schema, binary(), emqx_replay:time(), emqx_replay:topic(), binary()) -> +-callback store(_Schema, binary(), emqx_ds:time(), emqx_ds:topic(), binary()) -> ok | {error, _}. --callback make_iterator(_Schema, emqx_replay:replay()) -> +-callback make_iterator(_Schema, emqx_ds:replay()) -> {ok, _It} | {error, _}. --callback restore_iterator(_Schema, emqx_replay:replay(), binary()) -> {ok, _It} | {error, _}. +-callback restore_iterator(_Schema, emqx_ds:replay(), binary()) -> {ok, _It} | {error, _}. -callback preserve_iterator(_Schema, _It) -> term(). @@ -110,24 +110,24 @@ %% API funcions %%================================================================================ --spec start_link(emqx_replay:shard()) -> {ok, pid()}. +-spec start_link(emqx_ds:shard()) -> {ok, pid()}. start_link(Shard) -> gen_server:start_link(?REF(Shard), ?MODULE, [Shard], []). --spec create_generation(emqx_replay:shard(), emqx_replay:time(), emqx_replay_conf:backend_config()) -> +-spec create_generation(emqx_ds:shard(), emqx_ds:time(), emqx_ds_conf:backend_config()) -> {ok, gen_id()} | {error, nonmonotonic}. create_generation(Shard, Since, Config = {_Module, _Options}) -> gen_server:call(?REF(Shard), {create_generation, Since, Config}). -spec store( - emqx_replay:shard(), emqx_guid:guid(), emqx_replay:time(), emqx_replay:topic(), binary() + emqx_ds:shard(), emqx_guid:guid(), emqx_ds:time(), emqx_ds:topic(), binary() ) -> ok | {error, _}. store(Shard, GUID, Time, Topic, Msg) -> {_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Shard, Time), Mod:store(Data, GUID, Time, Topic, Msg). --spec make_iterator(emqx_replay:shard(), emqx_replay:replay()) -> +-spec make_iterator(emqx_ds:shard(), emqx_ds:replay()) -> {ok, iterator()} | {error, _TODO}. make_iterator(Shard, Replay = {_, StartTime}) -> {GenId, Gen} = meta_lookup_gen(Shard, StartTime), @@ -155,12 +155,12 @@ next(It = #it{module = Mod, data = ItData}) -> end end. --spec preserve_iterator(iterator(), emqx_replay:replay_id()) -> +-spec preserve_iterator(iterator(), emqx_ds:replay_id()) -> ok | {error, _TODO}. preserve_iterator(It = #it{}, ReplayID) -> iterator_put_state(ReplayID, It). --spec restore_iterator(emqx_replay:shard(), emqx_replay:replay_id()) -> +-spec restore_iterator(emqx_ds:shard(), emqx_ds:replay_id()) -> {ok, iterator()} | {error, _TODO}. restore_iterator(Shard, ReplayID) -> case iterator_get_state(Shard, ReplayID) of @@ -172,7 +172,7 @@ restore_iterator(Shard, ReplayID) -> Error end. --spec discard_iterator(emqx_replay:shard(), emqx_replay:replay_id()) -> +-spec discard_iterator(emqx_ds:shard(), emqx_ds:replay_id()) -> ok | {error, _TODO}. discard_iterator(Shard, ReplayID) -> iterator_delete(Shard, ReplayID). @@ -229,14 +229,14 @@ populate_metadata(GenId, S = #s{shard = Shard, db = DBHandle}) -> ensure_current_generation(S = #s{shard = Shard, db = DBHandle}) -> case schema_get_current(DBHandle) of undefined -> - Config = emqx_replay_conf:shard_config(Shard), + Config = emqx_ds_conf:shard_config(Shard), {ok, _, NS} = create_new_gen(0, Config, S), NS; _GenId -> S end. --spec create_new_gen(emqx_replay:time(), emqx_replay_conf:backend_config(), state()) -> +-spec create_new_gen(emqx_ds:time(), emqx_ds_conf:backend_config(), state()) -> {ok, gen_id(), state()} | {error, nonmonotonic}. create_new_gen(Since, Config, S = #s{shard = Shard, db = DBHandle}) -> GenId = get_next_id(meta_get_current(Shard)), @@ -253,7 +253,7 @@ create_new_gen(Since, Config, S = #s{shard = Shard, db = DBHandle}) -> Error end. --spec create_gen(gen_id(), emqx_replay:time(), emqx_replay_conf:backend_config(), state()) -> +-spec create_gen(gen_id(), emqx_ds:time(), emqx_ds_conf:backend_config(), state()) -> {ok, generation(), state()}. create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, cf_generations = CFs}) -> % TODO: Backend implementation should ensure idempotency. @@ -265,13 +265,13 @@ create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, cf_generations }, {ok, Gen, S#s{cf_generations = NewCFs ++ CFs}}. --spec open_db(emqx_replay:shard()) -> {ok, state()} | {error, _TODO}. +-spec open_db(emqx_ds:shard()) -> {ok, state()} | {error, _TODO}. open_db(Shard) -> Filename = binary_to_list(Shard), DBOptions = [ {create_if_missing, true}, {create_missing_column_families, true} - | emqx_replay_conf:db_options() + | emqx_ds_conf:db_options() ], ExistingCFs = case rocksdb:list_column_families(Filename, DBOptions) of @@ -425,7 +425,7 @@ schema_gen_key(N) -> -define(PERSISTENT_TERM(SHARD, GEN), {?MODULE, SHARD, GEN}). --spec meta_register_gen(emqx_replay:shard(), gen_id(), generation()) -> ok. +-spec meta_register_gen(emqx_ds:shard(), gen_id(), generation()) -> ok. meta_register_gen(Shard, GenId, Gen) -> Gs = case GenId > 0 of @@ -435,7 +435,7 @@ meta_register_gen(Shard, GenId, Gen) -> ok = meta_put(Shard, GenId, [Gen | Gs]), ok = meta_put(Shard, current, GenId). --spec meta_lookup_gen(emqx_replay:shard(), emqx_replay:time()) -> {gen_id(), generation()}. +-spec meta_lookup_gen(emqx_ds:shard(), emqx_ds:time()) -> {gen_id(), generation()}. meta_lookup_gen(Shard, Time) -> % TODO % Is cheaper persistent term GC on update here worth extra lookup? I'm leaning @@ -449,30 +449,30 @@ find_gen(Time, GenId, [Gen = #{since := Since} | _]) when Time >= Since -> find_gen(Time, GenId, [_Gen | Rest]) -> find_gen(Time, GenId - 1, Rest). --spec meta_get_gen(emqx_replay:shard(), gen_id()) -> generation() | undefined. +-spec meta_get_gen(emqx_ds:shard(), gen_id()) -> generation() | undefined. meta_get_gen(Shard, GenId) -> case meta_lookup(Shard, GenId, []) of [Gen | _Older] -> Gen; [] -> undefined end. --spec meta_get_current(emqx_replay:shard()) -> gen_id() | undefined. +-spec meta_get_current(emqx_ds:shard()) -> gen_id() | undefined. meta_get_current(Shard) -> meta_lookup(Shard, current, undefined). --spec meta_lookup(emqx_replay:shard(), _K) -> _V. +-spec meta_lookup(emqx_ds:shard(), _K) -> _V. meta_lookup(Shard, K) -> persistent_term:get(?PERSISTENT_TERM(Shard, K)). --spec meta_lookup(emqx_replay:shard(), _K, Default) -> _V | Default. +-spec meta_lookup(emqx_ds:shard(), _K, Default) -> _V | Default. meta_lookup(Shard, K, Default) -> persistent_term:get(?PERSISTENT_TERM(Shard, K), Default). --spec meta_put(emqx_replay:shard(), _K, _V) -> ok. +-spec meta_put(emqx_ds:shard(), _K, _V) -> ok. meta_put(Shard, K, V) -> persistent_term:put(?PERSISTENT_TERM(Shard, K), V). --spec meta_erase(emqx_replay:shard()) -> ok. +-spec meta_erase(emqx_ds:shard()) -> ok. meta_erase(Shard) -> [ persistent_term:erase(K) diff --git a/apps/emqx_durable_storage/src/emqx_replay_local_store_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_local_store_sup.erl similarity index 85% rename from apps/emqx_durable_storage/src/emqx_replay_local_store_sup.erl rename to apps/emqx_durable_storage/src/emqx_ds_local_store_sup.erl index 13f20c6bc..aad50992d 100644 --- a/apps/emqx_durable_storage/src/emqx_replay_local_store_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_local_store_sup.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_local_store_sup). +-module(emqx_ds_local_store_sup). -behaviour(supervisor). @@ -25,11 +25,11 @@ start_link() -> supervisor:start_link({local, ?SUP}, ?MODULE, []). --spec start_shard(emqx_replay:shard()) -> supervisor:startchild_ret(). +-spec start_shard(emqx_ds:shard()) -> supervisor:startchild_ret(). start_shard(Shard) -> supervisor:start_child(?SUP, shard_child_spec(Shard)). --spec stop_shard(emqx_replay:shard()) -> ok | {error, _}. +-spec stop_shard(emqx_ds:shard()) -> ok | {error, _}. stop_shard(Shard) -> ok = supervisor:terminate_child(?SUP, Shard), ok = supervisor:delete_child(?SUP, Shard). @@ -51,11 +51,11 @@ init([]) -> %% Internal functions %%================================================================================ --spec shard_child_spec(emqx_replay:shard()) -> supervisor:child_spec(). +-spec shard_child_spec(emqx_ds:shard()) -> supervisor:child_spec(). shard_child_spec(Shard) -> #{ id => Shard, - start => {emqx_replay_local_store, start_link, [Shard]}, + start => {emqx_ds_local_store, start_link, [Shard]}, shutdown => 5_000, restart => permanent, type => worker diff --git a/apps/emqx_durable_storage/src/emqx_replay_message_storage.erl b/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl similarity index 96% rename from apps/emqx_durable_storage/src/emqx_replay_message_storage.erl rename to apps/emqx_durable_storage/src/emqx_ds_message_storage.erl index f4d6c8e66..a9427b1f6 100644 --- a/apps/emqx_durable_storage/src/emqx_replay_message_storage.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl @@ -2,7 +2,7 @@ %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_message_storage). +-module(emqx_ds_message_storage). %%================================================================================ %% @doc Description of the schema @@ -128,8 +128,8 @@ %% Type declarations %%================================================================================ --type topic() :: emqx_replay:topic(). --type time() :: emqx_replay:time(). +-type topic() :: emqx_ds:topic(). +-type time() :: emqx_ds:time(). %% Number of bits -type bits() :: non_neg_integer(). @@ -152,7 +152,7 @@ iteration => iteration_options(), - cf_options => emqx_replay_local_store:db_cf_options() + cf_options => emqx_ds_local_store:db_cf_options() }. -type iteration_options() :: #{ @@ -170,12 +170,12 @@ -opaque schema() :: #schema{}. -record(db, { - shard :: emqx_replay:shard(), + shard :: emqx_ds:shard(), handle :: rocksdb:db_handle(), cf :: rocksdb:cf_handle(), keymapper :: keymapper(), - write_options = [{sync, true}] :: emqx_replay_local_store:db_write_options(), - read_options = [] :: emqx_replay_local_store:db_write_options() + write_options = [{sync, true}] :: emqx_ds_local_store:db_write_options(), + read_options = [] :: emqx_ds_local_store:db_write_options() }). -record(it, { @@ -221,8 +221,8 @@ %%================================================================================ %% Create a new column family for the generation and a serializable representation of the schema --spec create_new(rocksdb:db_handle(), emqx_replay_local_store:gen_id(), options()) -> - {schema(), emqx_replay_local_store:cf_refs()}. +-spec create_new(rocksdb:db_handle(), emqx_ds_local_store:gen_id(), options()) -> + {schema(), emqx_ds_local_store:cf_refs()}. create_new(DBHandle, GenId, Options) -> CFName = data_cf(GenId), CFOptions = maps:get(cf_options, Options, []), @@ -232,10 +232,10 @@ create_new(DBHandle, GenId, Options) -> %% Reopen the database -spec open( - emqx_replay:shard(), + emqx_ds:shard(), rocksdb:db_handle(), - emqx_replay_local_store:gen_id(), - emqx_replay_local_store:cf_refs(), + emqx_ds_local_store:gen_id(), + emqx_ds_local_store:cf_refs(), schema() ) -> db(). @@ -277,13 +277,13 @@ store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, Value = make_message_value(Topic, MessagePayload), rocksdb:put(DBHandle, CFHandle, Key, Value, DB#db.write_options). --spec make_iterator(db(), emqx_replay:replay()) -> +-spec make_iterator(db(), emqx_ds:replay()) -> {ok, iterator()} | {error, _TODO}. make_iterator(DB, Replay) -> - Options = emqx_replay_conf:shard_iteration_options(DB#db.shard), + Options = emqx_ds_conf:shard_iteration_options(DB#db.shard), make_iterator(DB, Replay, Options). --spec make_iterator(db(), emqx_replay:replay(), iteration_options()) -> +-spec make_iterator(db(), emqx_ds:replay(), iteration_options()) -> % {error, invalid_start_time}? might just start from the beginning of time % and call it a day: client violated the contract anyway. {ok, iterator()} | {error, _TODO}. @@ -337,7 +337,7 @@ preserve_iterator(#it{cursor = Cursor}) -> }, term_to_binary(State). --spec restore_iterator(db(), emqx_replay:replay(), binary()) -> +-spec restore_iterator(db(), emqx_ds:replay(), binary()) -> {ok, iterator()} | {error, _TODO}. restore_iterator(DB, Replay, Serial) when is_binary(Serial) -> State = binary_to_term(Serial), @@ -419,7 +419,7 @@ hash(Input, Bits) -> % at most 32 bits erlang:phash2(Input, 1 bsl Bits). --spec make_keyspace_filter(emqx_replay:replay(), keymapper()) -> keyspace_filter(). +-spec make_keyspace_filter(emqx_ds:replay(), keymapper()) -> keyspace_filter(). make_keyspace_filter({TopicFilter, StartTime}, Keymapper) -> Bitstring = compute_bitstring(TopicFilter, StartTime, Keymapper), HashBitmask = compute_topic_bitmask(TopicFilter, Keymapper), @@ -710,7 +710,7 @@ substring(I, Offset, Size) -> (I bsr Offset) band ones(Size). %% @doc Generate a column family ID for the MQTT messages --spec data_cf(emqx_replay_local_store:gen_id()) -> [char()]. +-spec data_cf(emqx_ds_local_store:gen_id()) -> [char()]. data_cf(GenId) -> ?MODULE_STRING ++ integer_to_list(GenId). diff --git a/apps/emqx_durable_storage/src/emqx_replay.erl b/apps/emqx_durable_storage/src/emqx_ds_replay.erl similarity index 98% rename from apps/emqx_durable_storage/src/emqx_replay.erl rename to apps/emqx_durable_storage/src/emqx_ds_replay.erl index f1ea7beac..db49c368d 100644 --- a/apps/emqx_durable_storage/src/emqx_replay.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replay.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay). +-module(emqx_ds_replay). %% API: -export([]). diff --git a/apps/emqx_durable_storage/src/emqx_replay_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_sup.erl similarity index 94% rename from apps/emqx_durable_storage/src/emqx_replay_sup.erl rename to apps/emqx_durable_storage/src/emqx_ds_sup.erl index 945a71180..ebd022632 100644 --- a/apps/emqx_durable_storage/src/emqx_replay_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_sup.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_sup). +-module(emqx_ds_sup). -behaviour(supervisor). @@ -45,7 +45,7 @@ init([]) -> shard_sup() -> #{ id => local_store_shard_sup, - start => {emqx_replay_local_store_sup, start_link, []}, + start => {emqx_ds_local_store_sup, start_link, []}, restart => permanent, type => supervisor, shutdown => infinity diff --git a/apps/emqx_durable_storage/test/emqx_replay_local_store_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_local_store_SUITE.erl similarity index 76% rename from apps/emqx_durable_storage/test/emqx_replay_local_store_SUITE.erl rename to apps/emqx_durable_storage/test/emqx_ds_local_store_SUITE.erl index da6fef09d..d59c4571e 100644 --- a/apps/emqx_durable_storage/test/emqx_replay_local_store_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_local_store_SUITE.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_local_store_SUITE). +-module(emqx_ds_local_store_SUITE). -compile(export_all). -compile(nowarn_export_all). @@ -12,7 +12,7 @@ -define(SHARD, shard(?FUNCTION_NAME)). -define(DEFAULT_CONFIG, - {emqx_replay_message_storage, #{ + {emqx_ds_message_storage, #{ timestamp_bits => 64, topic_bits_per_level => [8, 8, 32, 16], epoch => 5, @@ -23,7 +23,7 @@ ). -define(COMPACT_CONFIG, - {emqx_replay_message_storage, #{ + {emqx_ds_message_storage, #{ timestamp_bits => 16, topic_bits_per_level => [16, 16], epoch => 10 @@ -32,8 +32,8 @@ %% Smoke test for opening and reopening the database t_open(_Config) -> - ok = emqx_replay_local_store_sup:stop_shard(?SHARD), - {ok, _} = emqx_replay_local_store_sup:start_shard(?SHARD). + ok = emqx_ds_local_store_sup:stop_shard(?SHARD), + {ok, _} = emqx_ds_local_store_sup:start_shard(?SHARD). %% Smoke test of store function t_store(_Config) -> @@ -41,7 +41,7 @@ t_store(_Config) -> PublishedAt = 1000, Topic = [<<"foo">>, <<"bar">>], Payload = <<"message">>, - ?assertMatch(ok, emqx_replay_local_store:store(?SHARD, MessageID, PublishedAt, Topic, Payload)). + ?assertMatch(ok, emqx_ds_local_store:store(?SHARD, MessageID, PublishedAt, Topic, Payload)). %% Smoke test for iteration through a concrete topic t_iterate(_Config) -> @@ -49,7 +49,7 @@ t_iterate(_Config) -> Topics = [[<<"foo">>, <<"bar">>], [<<"foo">>, <<"bar">>, <<"baz">>], [<<"a">>]], Timestamps = lists:seq(1, 10), [ - emqx_replay_local_store:store( + emqx_ds_local_store:store( ?SHARD, emqx_guid:gen(), PublishedAt, @@ -61,7 +61,7 @@ t_iterate(_Config) -> %% Iterate through individual topics: [ begin - {ok, It} = emqx_replay_local_store:make_iterator(?SHARD, {Topic, 0}), + {ok, It} = emqx_ds_local_store:make_iterator(?SHARD, {Topic, 0}), Values = iterate(It), ?assertEqual(lists:map(fun integer_to_binary/1, Timestamps), Values) end @@ -136,16 +136,16 @@ t_iterate_long_tail_wildcard(_Config) -> ). t_create_gen(_Config) -> - {ok, 1} = emqx_replay_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG), + {ok, 1} = emqx_ds_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG), ?assertEqual( {error, nonmonotonic}, - emqx_replay_local_store:create_generation(?SHARD, 1, ?DEFAULT_CONFIG) + emqx_ds_local_store:create_generation(?SHARD, 1, ?DEFAULT_CONFIG) ), ?assertEqual( {error, nonmonotonic}, - emqx_replay_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG) + emqx_ds_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG) ), - {ok, 2} = emqx_replay_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_ds_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), Topics = ["foo/bar", "foo/bar/baz"], Timestamps = lists:seq(1, 100), [ @@ -154,9 +154,9 @@ t_create_gen(_Config) -> ]. t_iterate_multigen(_Config) -> - {ok, 1} = emqx_replay_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), - {ok, 2} = emqx_replay_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), - {ok, 3} = emqx_replay_local_store:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG), + {ok, 1} = emqx_ds_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_ds_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_ds_local_store:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG), Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], Timestamps = lists:seq(1, 100), _ = [ @@ -180,9 +180,9 @@ t_iterate_multigen(_Config) -> t_iterate_multigen_preserve_restore(_Config) -> ReplayID = atom_to_binary(?FUNCTION_NAME), - {ok, 1} = emqx_replay_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), - {ok, 2} = emqx_replay_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), - {ok, 3} = emqx_replay_local_store:create_generation(?SHARD, 100, ?DEFAULT_CONFIG), + {ok, 1} = emqx_ds_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_ds_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_ds_local_store:create_generation(?SHARD, 100, ?DEFAULT_CONFIG), Topics = ["foo/bar", "foo/bar/baz", "a/bar"], Timestamps = lists:seq(1, 100), TopicFilter = "foo/#", @@ -194,12 +194,12 @@ t_iterate_multigen_preserve_restore(_Config) -> It0 = iterator(?SHARD, TopicFilter, 0), {It1, Res10} = iterate(It0, 10), % preserve mid-generation - ok = emqx_replay_local_store:preserve_iterator(It1, ReplayID), - {ok, It2} = emqx_replay_local_store:restore_iterator(?SHARD, ReplayID), + ok = emqx_ds_local_store:preserve_iterator(It1, ReplayID), + {ok, It2} = emqx_ds_local_store:restore_iterator(?SHARD, ReplayID), {It3, Res100} = iterate(It2, 88), % preserve on the generation boundary - ok = emqx_replay_local_store:preserve_iterator(It3, ReplayID), - {ok, It4} = emqx_replay_local_store:restore_iterator(?SHARD, ReplayID), + ok = emqx_ds_local_store:preserve_iterator(It3, ReplayID), + {ok, It4} = emqx_ds_local_store:restore_iterator(?SHARD, ReplayID), {It5, Res200} = iterate(It4, 1000), ?assertEqual(none, It5), ?assertEqual( @@ -208,22 +208,22 @@ t_iterate_multigen_preserve_restore(_Config) -> ), ?assertEqual( ok, - emqx_replay_local_store:discard_iterator(?SHARD, ReplayID) + emqx_ds_local_store:discard_iterator(?SHARD, ReplayID) ), ?assertEqual( {error, not_found}, - emqx_replay_local_store:restore_iterator(?SHARD, ReplayID) + emqx_ds_local_store:restore_iterator(?SHARD, ReplayID) ). store(Shard, PublishedAt, Topic, Payload) -> ID = emqx_guid:gen(), - emqx_replay_local_store:store(Shard, ID, PublishedAt, parse_topic(Topic), Payload). + emqx_ds_local_store:store(Shard, ID, PublishedAt, parse_topic(Topic), Payload). iterate(DB, TopicFilter, StartTime) -> iterate(iterator(DB, TopicFilter, StartTime)). iterate(It) -> - case emqx_replay_local_store:next(It) of + case emqx_ds_local_store:next(It) of {value, Payload, ItNext} -> [Payload | iterate(ItNext)]; none -> @@ -233,7 +233,7 @@ iterate(It) -> iterate(It, 0) -> {It, []}; iterate(It, N) -> - case emqx_replay_local_store:next(It) of + case emqx_ds_local_store:next(It) of {value, Payload, ItNext} -> {ItFinal, Ps} = iterate(ItNext, N - 1), {ItFinal, [Payload | Ps]}; @@ -242,7 +242,7 @@ iterate(It, N) -> end. iterator(DB, TopicFilter, StartTime) -> - {ok, It} = emqx_replay_local_store:make_iterator(DB, {parse_topic(TopicFilter), StartTime}), + {ok, It} = emqx_ds_local_store:make_iterator(DB, {parse_topic(TopicFilter), StartTime}), It. parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) -> @@ -255,22 +255,22 @@ parse_topic(Topic) -> all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - {ok, _} = application:ensure_all_started(emqx_replay), + {ok, _} = application:ensure_all_started(emqx_ds), Config. end_per_suite(_Config) -> - ok = application:stop(emqx_replay). + ok = application:stop(emqx_ds). init_per_testcase(TC, Config) -> ok = set_shard_config(shard(TC), ?DEFAULT_CONFIG), - {ok, _} = emqx_replay_local_store_sup:start_shard(shard(TC)), + {ok, _} = emqx_ds_local_store_sup:start_shard(shard(TC)), Config. end_per_testcase(TC, _Config) -> - ok = emqx_replay_local_store_sup:stop_shard(shard(TC)). + ok = emqx_ds_local_store_sup:stop_shard(shard(TC)). shard(TC) -> list_to_binary(lists:concat([?MODULE, "_", TC])). set_shard_config(Shard, Config) -> - ok = application:set_env(emqx_replay, shard_config, #{Shard => Config}). + ok = application:set_env(emqx_ds, shard_config, #{Shard => Config}). diff --git a/apps/emqx_durable_storage/test/emqx_replay_message_storage_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_message_storage_SUITE.erl similarity index 98% rename from apps/emqx_durable_storage/test/emqx_replay_message_storage_SUITE.erl rename to apps/emqx_durable_storage/test/emqx_ds_message_storage_SUITE.erl index a26579299..cbffcc4a1 100644 --- a/apps/emqx_durable_storage/test/emqx_replay_message_storage_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_message_storage_SUITE.erl @@ -1,14 +1,14 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_message_storage_SUITE). +-module(emqx_ds_message_storage_SUITE). -compile(export_all). -compile(nowarn_export_all). -include_lib("stdlib/include/assert.hrl"). --import(emqx_replay_message_storage, [ +-import(emqx_ds_message_storage, [ make_keymapper/1, keymapper_info/1, compute_topic_bitmask/2, diff --git a/apps/emqx_durable_storage/test/props/emqx_replay_message_storage_shim.erl b/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_shim.erl similarity index 92% rename from apps/emqx_durable_storage/test/props/emqx_replay_message_storage_shim.erl rename to apps/emqx_durable_storage/test/props/emqx_ds_message_storage_shim.erl index f8e5c33d9..7f6cf8e64 100644 --- a/apps/emqx_durable_storage/test/props/emqx_replay_message_storage_shim.erl +++ b/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_shim.erl @@ -2,7 +2,7 @@ %% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_replay_message_storage_shim). +-module(emqx_ds_message_storage_shim). -export([open/0]). -export([close/1]). @@ -29,7 +29,7 @@ store(Tab, MessageID, PublishedAt, Topic, Payload) -> true = ets:insert(Tab, {{PublishedAt, MessageID}, Topic, Payload}), ok. --spec iterate(t(), emqx_replay:replay()) -> +-spec iterate(t(), emqx_ds:replay()) -> [binary()]. iterate(Tab, {TopicFilter, StartTime}) -> ets:foldr( diff --git a/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl b/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl index 7713a66a6..08ae5d21d 100644 --- a/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl @@ -22,7 +22,7 @@ prop_bitstring_computes() -> Keymapper, keymapper(), ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin - BS = emqx_replay_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), + BS = emqx_ds_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), is_integer(BS) andalso (BS < (1 bsl get_keymapper_bitsize(Keymapper))) end) ). @@ -30,7 +30,7 @@ prop_bitstring_computes() -> prop_topic_bitmask_computes() -> Keymapper = make_keymapper(16, [8, 12, 16], 100), ?FORALL(TopicFilter, topic_filter(), begin - Mask = emqx_replay_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), + Mask = emqx_ds_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), % topic bits + timestamp LSBs is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) end). @@ -40,14 +40,14 @@ prop_next_seek_monotonic() -> {TopicFilter, StartTime, Keymapper}, {topic_filter(), pos_integer(), keymapper()}, begin - Filter = emqx_replay_message_storage:make_keyspace_filter( + Filter = emqx_ds_message_storage:make_keyspace_filter( {TopicFilter, StartTime}, Keymapper ), ?FORALL( Bitstring, bitstr(get_keymapper_bitsize(Keymapper)), - emqx_replay_message_storage:compute_next_seek(Bitstring, Filter) >= Bitstring + emqx_ds_message_storage:compute_next_seek(Bitstring, Filter) >= Bitstring ) end ). @@ -56,8 +56,8 @@ prop_next_seek_eq_initial_seek() -> ?FORALL( Filter, keyspace_filter(), - emqx_replay_message_storage:compute_initial_seek(Filter) =:= - emqx_replay_message_storage:compute_next_seek(0, Filter) + emqx_ds_message_storage:compute_initial_seek(Filter) =:= + emqx_ds_message_storage:compute_next_seek(0, Filter) ). prop_iterate_messages() -> @@ -72,7 +72,7 @@ prop_iterate_messages() -> ?FORALL(Stream, noshrink(non_empty(messages(topic(TBPL)))), begin Filepath = make_filepath(?FUNCTION_NAME, erlang:system_time(microsecond)), {DB, Handle} = open_db(Filepath, Options), - Shim = emqx_replay_message_storage_shim:open(), + Shim = emqx_ds_message_storage_shim:open(), ok = store_db(DB, Stream), ok = store_shim(Shim, Stream), ?FORALL( @@ -92,7 +92,7 @@ prop_iterate_messages() -> Messages = iterate_db(DB, Iteration), Reference = iterate_shim(Shim, Iteration), ok = close_db(Handle), - ok = emqx_replay_message_storage_shim:close(Shim), + ok = emqx_ds_message_storage_shim:close(Shim), ?WHENFAIL( begin io:format(user, " *** Filepath = ~s~n", [Filepath]), @@ -182,7 +182,7 @@ prop_iterate_eq_iterate_with_refresh() -> % PublishedAt = ChunkNum, % MessageID, PublishedAt, Topic % ]), -% ok = emqx_replay_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload), +% ok = emqx_ds_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload), % store_message_stream(DB, payload_gen:next(Rest)); % store_message_stream(_Zone, []) -> % ok. @@ -191,7 +191,7 @@ store_db(DB, Messages) -> lists:foreach( fun({Topic, Payload = {MessageID, Timestamp, _}}) -> Bin = term_to_binary(Payload), - emqx_replay_message_storage:store(DB, MessageID, Timestamp, Topic, Bin) + emqx_ds_message_storage:store(DB, MessageID, Timestamp, Topic, Bin) end, Messages ). @@ -200,7 +200,7 @@ iterate_db(DB, Iteration) -> iterate_db(make_iterator(DB, Iteration)). iterate_db(It) -> - case emqx_replay_message_storage:next(It) of + case emqx_ds_message_storage:next(It) of {value, Payload, ItNext} -> [binary_to_term(Payload) | iterate_db(ItNext)]; none -> @@ -208,15 +208,15 @@ iterate_db(It) -> end. make_iterator(DB, Replay) -> - {ok, It} = emqx_replay_message_storage:make_iterator(DB, Replay), + {ok, It} = emqx_ds_message_storage:make_iterator(DB, Replay), It. make_iterator(DB, Replay, Options) -> - {ok, It} = emqx_replay_message_storage:make_iterator(DB, Replay, Options), + {ok, It} = emqx_ds_message_storage:make_iterator(DB, Replay, Options), It. run_iterator_commands([iterate | Rest], It, Ctx) -> - case emqx_replay_message_storage:next(It) of + case emqx_ds_message_storage:next(It) of {value, Payload, ItNext} -> [binary_to_term(Payload) | run_iterator_commands(Rest, ItNext, Ctx)]; none -> @@ -227,8 +227,8 @@ run_iterator_commands([{preserve, restore} | Rest], It, Ctx) -> db := DB, replay := Replay } = Ctx, - Serial = emqx_replay_message_storage:preserve_iterator(It), - {ok, ItNext} = emqx_replay_message_storage:restore_iterator(DB, Replay, Serial), + Serial = emqx_ds_message_storage:preserve_iterator(It), + {ok, ItNext} = emqx_ds_message_storage:restore_iterator(DB, Replay, Serial), run_iterator_commands(Rest, ItNext, Ctx); run_iterator_commands([], It, _Ctx) -> iterate_db(It). @@ -237,7 +237,7 @@ store_shim(Shim, Messages) -> lists:foreach( fun({Topic, Payload = {MessageID, Timestamp, _}}) -> Bin = term_to_binary(Payload), - emqx_replay_message_storage_shim:store(Shim, MessageID, Timestamp, Topic, Bin) + emqx_ds_message_storage_shim:store(Shim, MessageID, Timestamp, Topic, Bin) end, Messages ). @@ -245,7 +245,7 @@ store_shim(Shim, Messages) -> iterate_shim(Shim, Iteration) -> lists:map( fun binary_to_term/1, - emqx_replay_message_storage_shim:iterate(Shim, Iteration) + emqx_ds_message_storage_shim:iterate(Shim, Iteration) ). %%-------------------------------------------------------------------- @@ -254,8 +254,8 @@ iterate_shim(Shim, Iteration) -> open_db(Filepath, Options) -> {ok, Handle} = rocksdb:open(Filepath, [{create_if_missing, true}]), - {Schema, CFRefs} = emqx_replay_message_storage:create_new(Handle, ?GEN_ID, Options), - DB = emqx_replay_message_storage:open(?ZONE, Handle, ?GEN_ID, CFRefs, Schema), + {Schema, CFRefs} = emqx_ds_message_storage:create_new(Handle, ?GEN_ID, Options), + DB = emqx_ds_message_storage:open(?ZONE, Handle, ?GEN_ID, CFRefs, Schema), {DB, Handle}. close_db(Handle) -> @@ -379,7 +379,7 @@ keyspace_filter() -> ?LET( {TopicFilter, StartTime, Keymapper}, {topic_filter(), pos_integer(), keymapper()}, - emqx_replay_message_storage:make_keyspace_filter({TopicFilter, StartTime}, Keymapper) + emqx_ds_message_storage:make_keyspace_filter({TopicFilter, StartTime}, Keymapper) ). messages(Topic) -> @@ -426,14 +426,14 @@ flat(T) -> %%-------------------------------------------------------------------- make_keymapper(TimestampBits, TopicBits, MaxEpoch) -> - emqx_replay_message_storage:make_keymapper(#{ + emqx_ds_message_storage:make_keymapper(#{ timestamp_bits => TimestampBits, topic_bits_per_level => TopicBits, epoch => MaxEpoch }). get_keymapper_bitsize(Keymapper) -> - maps:get(bitsize, emqx_replay_message_storage:keymapper_info(Keymapper)). + maps:get(bitsize, emqx_ds_message_storage:keymapper_info(Keymapper)). -spec interleave(list({Tag, list(E)}), rand:state()) -> list({Tag, E}). interleave(Seqs, Rng) -> From a4219db1635fb6060d4957d0b45570f7a1594c78 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 17 May 2023 16:23:42 +0200 Subject: [PATCH 45/49] refactor(ds): emqx_ds_local_store -> emqx_ds_storage_layer --- .../emqx_durable_storage/src/emqx_ds_conf.erl | 2 +- .../src/emqx_ds_message_storage.erl | 16 +++--- ...al_store.erl => emqx_ds_storage_layer.erl} | 2 +- ..._sup.erl => emqx_ds_storage_layer_sup.erl} | 4 +- apps/emqx_durable_storage/src/emqx_ds_sup.erl | 2 +- ...TE.erl => emqx_ds_storage_layer_SUITE.erl} | 56 +++++++++---------- 6 files changed, 41 insertions(+), 41 deletions(-) rename apps/emqx_durable_storage/src/{emqx_ds_local_store.erl => emqx_ds_storage_layer.erl} (99%) rename apps/emqx_durable_storage/src/{emqx_ds_local_store_sup.erl => emqx_ds_storage_layer_sup.erl} (95%) rename apps/emqx_durable_storage/test/{emqx_ds_local_store_SUITE.erl => emqx_ds_storage_layer_SUITE.erl} (79%) diff --git a/apps/emqx_durable_storage/src/emqx_ds_conf.erl b/apps/emqx_durable_storage/src/emqx_ds_conf.erl index f5761e8a0..e748c359e 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_conf.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_conf.erl @@ -55,6 +55,6 @@ default_shard_config() -> } }}. --spec db_options() -> emqx_ds_local_store:db_options(). +-spec db_options() -> emqx_ds_storage_layer:db_options(). db_options() -> application:get_env(?APP, db_options, []). diff --git a/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl b/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl index a9427b1f6..9ebb23726 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl @@ -152,7 +152,7 @@ iteration => iteration_options(), - cf_options => emqx_ds_local_store:db_cf_options() + cf_options => emqx_ds_storage_layer:db_cf_options() }. -type iteration_options() :: #{ @@ -174,8 +174,8 @@ handle :: rocksdb:db_handle(), cf :: rocksdb:cf_handle(), keymapper :: keymapper(), - write_options = [{sync, true}] :: emqx_ds_local_store:db_write_options(), - read_options = [] :: emqx_ds_local_store:db_write_options() + write_options = [{sync, true}] :: emqx_ds_storage_layer:db_write_options(), + read_options = [] :: emqx_ds_storage_layer:db_write_options() }). -record(it, { @@ -221,8 +221,8 @@ %%================================================================================ %% Create a new column family for the generation and a serializable representation of the schema --spec create_new(rocksdb:db_handle(), emqx_ds_local_store:gen_id(), options()) -> - {schema(), emqx_ds_local_store:cf_refs()}. +-spec create_new(rocksdb:db_handle(), emqx_ds_storage_layer:gen_id(), options()) -> + {schema(), emqx_ds_storage_layer:cf_refs()}. create_new(DBHandle, GenId, Options) -> CFName = data_cf(GenId), CFOptions = maps:get(cf_options, Options, []), @@ -234,8 +234,8 @@ create_new(DBHandle, GenId, Options) -> -spec open( emqx_ds:shard(), rocksdb:db_handle(), - emqx_ds_local_store:gen_id(), - emqx_ds_local_store:cf_refs(), + emqx_ds_storage_layer:gen_id(), + emqx_ds_storage_layer:cf_refs(), schema() ) -> db(). @@ -710,7 +710,7 @@ substring(I, Offset, Size) -> (I bsr Offset) band ones(Size). %% @doc Generate a column family ID for the MQTT messages --spec data_cf(emqx_ds_local_store:gen_id()) -> [char()]. +-spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()]. data_cf(GenId) -> ?MODULE_STRING ++ integer_to_list(GenId). diff --git a/apps/emqx_durable_storage/src/emqx_ds_local_store.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl similarity index 99% rename from apps/emqx_durable_storage/src/emqx_ds_local_store.erl rename to apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 45845d714..43a399a1b 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_local_store.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_ds_local_store). +-module(emqx_ds_storage_layer). -behaviour(gen_server). diff --git a/apps/emqx_durable_storage/src/emqx_ds_local_store_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer_sup.erl similarity index 95% rename from apps/emqx_durable_storage/src/emqx_ds_local_store_sup.erl rename to apps/emqx_durable_storage/src/emqx_ds_storage_layer_sup.erl index aad50992d..ed745df5f 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_local_store_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer_sup.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_ds_local_store_sup). +-module(emqx_ds_storage_layer_sup). -behaviour(supervisor). @@ -55,7 +55,7 @@ init([]) -> shard_child_spec(Shard) -> #{ id => Shard, - start => {emqx_ds_local_store, start_link, [Shard]}, + start => {emqx_ds_storage_layer, start_link, [Shard]}, shutdown => 5_000, restart => permanent, type => worker diff --git a/apps/emqx_durable_storage/src/emqx_ds_sup.erl b/apps/emqx_durable_storage/src/emqx_ds_sup.erl index ebd022632..ca939e892 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_sup.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_sup.erl @@ -45,7 +45,7 @@ init([]) -> shard_sup() -> #{ id => local_store_shard_sup, - start => {emqx_ds_local_store_sup, start_link, []}, + start => {emqx_ds_storage_layer_sup, start_link, []}, restart => permanent, type => supervisor, shutdown => infinity diff --git a/apps/emqx_durable_storage/test/emqx_ds_local_store_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl similarity index 79% rename from apps/emqx_durable_storage/test/emqx_ds_local_store_SUITE.erl rename to apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl index d59c4571e..9fd93ecfe 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_local_store_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl @@ -1,7 +1,7 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_ds_local_store_SUITE). +-module(emqx_ds_storage_layer_SUITE). -compile(export_all). -compile(nowarn_export_all). @@ -32,8 +32,8 @@ %% Smoke test for opening and reopening the database t_open(_Config) -> - ok = emqx_ds_local_store_sup:stop_shard(?SHARD), - {ok, _} = emqx_ds_local_store_sup:start_shard(?SHARD). + ok = emqx_ds_storage_layer_sup:stop_shard(?SHARD), + {ok, _} = emqx_ds_storage_layer_sup:start_shard(?SHARD). %% Smoke test of store function t_store(_Config) -> @@ -41,7 +41,7 @@ t_store(_Config) -> PublishedAt = 1000, Topic = [<<"foo">>, <<"bar">>], Payload = <<"message">>, - ?assertMatch(ok, emqx_ds_local_store:store(?SHARD, MessageID, PublishedAt, Topic, Payload)). + ?assertMatch(ok, emqx_ds_storage_layer:store(?SHARD, MessageID, PublishedAt, Topic, Payload)). %% Smoke test for iteration through a concrete topic t_iterate(_Config) -> @@ -49,7 +49,7 @@ t_iterate(_Config) -> Topics = [[<<"foo">>, <<"bar">>], [<<"foo">>, <<"bar">>, <<"baz">>], [<<"a">>]], Timestamps = lists:seq(1, 10), [ - emqx_ds_local_store:store( + emqx_ds_storage_layer:store( ?SHARD, emqx_guid:gen(), PublishedAt, @@ -61,7 +61,7 @@ t_iterate(_Config) -> %% Iterate through individual topics: [ begin - {ok, It} = emqx_ds_local_store:make_iterator(?SHARD, {Topic, 0}), + {ok, It} = emqx_ds_storage_layer:make_iterator(?SHARD, {Topic, 0}), Values = iterate(It), ?assertEqual(lists:map(fun integer_to_binary/1, Timestamps), Values) end @@ -136,16 +136,16 @@ t_iterate_long_tail_wildcard(_Config) -> ). t_create_gen(_Config) -> - {ok, 1} = emqx_ds_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG), + {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 5, ?DEFAULT_CONFIG), ?assertEqual( {error, nonmonotonic}, - emqx_ds_local_store:create_generation(?SHARD, 1, ?DEFAULT_CONFIG) + emqx_ds_storage_layer:create_generation(?SHARD, 1, ?DEFAULT_CONFIG) ), ?assertEqual( {error, nonmonotonic}, - emqx_ds_local_store:create_generation(?SHARD, 5, ?DEFAULT_CONFIG) + emqx_ds_storage_layer:create_generation(?SHARD, 5, ?DEFAULT_CONFIG) ), - {ok, 2} = emqx_ds_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG), Topics = ["foo/bar", "foo/bar/baz"], Timestamps = lists:seq(1, 100), [ @@ -154,9 +154,9 @@ t_create_gen(_Config) -> ]. t_iterate_multigen(_Config) -> - {ok, 1} = emqx_ds_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), - {ok, 2} = emqx_ds_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), - {ok, 3} = emqx_ds_local_store:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG), + {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG), Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"], Timestamps = lists:seq(1, 100), _ = [ @@ -180,9 +180,9 @@ t_iterate_multigen(_Config) -> t_iterate_multigen_preserve_restore(_Config) -> ReplayID = atom_to_binary(?FUNCTION_NAME), - {ok, 1} = emqx_ds_local_store:create_generation(?SHARD, 10, ?COMPACT_CONFIG), - {ok, 2} = emqx_ds_local_store:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), - {ok, 3} = emqx_ds_local_store:create_generation(?SHARD, 100, ?DEFAULT_CONFIG), + {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG), + {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG), + {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 100, ?DEFAULT_CONFIG), Topics = ["foo/bar", "foo/bar/baz", "a/bar"], Timestamps = lists:seq(1, 100), TopicFilter = "foo/#", @@ -194,12 +194,12 @@ t_iterate_multigen_preserve_restore(_Config) -> It0 = iterator(?SHARD, TopicFilter, 0), {It1, Res10} = iterate(It0, 10), % preserve mid-generation - ok = emqx_ds_local_store:preserve_iterator(It1, ReplayID), - {ok, It2} = emqx_ds_local_store:restore_iterator(?SHARD, ReplayID), + ok = emqx_ds_storage_layer:preserve_iterator(It1, ReplayID), + {ok, It2} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID), {It3, Res100} = iterate(It2, 88), % preserve on the generation boundary - ok = emqx_ds_local_store:preserve_iterator(It3, ReplayID), - {ok, It4} = emqx_ds_local_store:restore_iterator(?SHARD, ReplayID), + ok = emqx_ds_storage_layer:preserve_iterator(It3, ReplayID), + {ok, It4} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID), {It5, Res200} = iterate(It4, 1000), ?assertEqual(none, It5), ?assertEqual( @@ -208,22 +208,22 @@ t_iterate_multigen_preserve_restore(_Config) -> ), ?assertEqual( ok, - emqx_ds_local_store:discard_iterator(?SHARD, ReplayID) + emqx_ds_storage_layer:discard_iterator(?SHARD, ReplayID) ), ?assertEqual( {error, not_found}, - emqx_ds_local_store:restore_iterator(?SHARD, ReplayID) + emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID) ). store(Shard, PublishedAt, Topic, Payload) -> ID = emqx_guid:gen(), - emqx_ds_local_store:store(Shard, ID, PublishedAt, parse_topic(Topic), Payload). + emqx_ds_storage_layer:store(Shard, ID, PublishedAt, parse_topic(Topic), Payload). iterate(DB, TopicFilter, StartTime) -> iterate(iterator(DB, TopicFilter, StartTime)). iterate(It) -> - case emqx_ds_local_store:next(It) of + case emqx_ds_storage_layer:next(It) of {value, Payload, ItNext} -> [Payload | iterate(ItNext)]; none -> @@ -233,7 +233,7 @@ iterate(It) -> iterate(It, 0) -> {It, []}; iterate(It, N) -> - case emqx_ds_local_store:next(It) of + case emqx_ds_storage_layer:next(It) of {value, Payload, ItNext} -> {ItFinal, Ps} = iterate(ItNext, N - 1), {ItFinal, [Payload | Ps]}; @@ -242,7 +242,7 @@ iterate(It, N) -> end. iterator(DB, TopicFilter, StartTime) -> - {ok, It} = emqx_ds_local_store:make_iterator(DB, {parse_topic(TopicFilter), StartTime}), + {ok, It} = emqx_ds_storage_layer:make_iterator(DB, {parse_topic(TopicFilter), StartTime}), It. parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) -> @@ -263,11 +263,11 @@ end_per_suite(_Config) -> init_per_testcase(TC, Config) -> ok = set_shard_config(shard(TC), ?DEFAULT_CONFIG), - {ok, _} = emqx_ds_local_store_sup:start_shard(shard(TC)), + {ok, _} = emqx_ds_storage_layer_sup:start_shard(shard(TC)), Config. end_per_testcase(TC, _Config) -> - ok = emqx_ds_local_store_sup:stop_shard(shard(TC)). + ok = emqx_ds_storage_layer_sup:stop_shard(shard(TC)). shard(TC) -> list_to_binary(lists:concat([?MODULE, "_", TC])). From a343cdb1d52b42e878e3d1f5c85c6a361a30ec07 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 17 May 2023 16:32:50 +0200 Subject: [PATCH 46/49] refactor(ds): message_storage -> message_storage_bitmask --- .../emqx_durable_storage/src/emqx_ds_conf.erl | 12 ++--- ...rl => emqx_ds_message_storage_bitmask.erl} | 2 +- ...emqx_ds_message_storage_bitmask_SUITE.erl} | 4 +- .../test/emqx_ds_storage_layer_SUITE.erl | 4 +- ... emqx_ds_message_storage_bitmask_shim.erl} | 2 +- .../props/prop_replay_message_storage.erl | 46 +++++++++---------- 6 files changed, 35 insertions(+), 35 deletions(-) rename apps/emqx_durable_storage/src/{emqx_ds_message_storage.erl => emqx_ds_message_storage_bitmask.erl} (99%) rename apps/emqx_durable_storage/test/{emqx_ds_message_storage_SUITE.erl => emqx_ds_message_storage_bitmask_SUITE.erl} (98%) rename apps/emqx_durable_storage/test/props/{emqx_ds_message_storage_shim.erl => emqx_ds_message_storage_bitmask_shim.erl} (96%) diff --git a/apps/emqx_durable_storage/src/emqx_ds_conf.erl b/apps/emqx_durable_storage/src/emqx_ds_conf.erl index e748c359e..db8b14b45 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_conf.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_conf.erl @@ -12,7 +12,7 @@ -export([default_iteration_options/0]). -type backend_config() :: - {emqx_ds_message_storage, emqx_ds_message_storage:options()} + {emqx_ds_message_storage_bitmask, emqx_ds_message_storage_bitmask:options()} | {module(), _Options}. -export_type([backend_config/0]). @@ -30,23 +30,23 @@ shard_config(Shard) -> maps:get(Shard, Shards, DefaultShardConfig). -spec shard_iteration_options(emqx_ds:shard()) -> - emqx_ds_message_storage:iteration_options(). + emqx_ds_message_storage_bitmask:iteration_options(). shard_iteration_options(Shard) -> case shard_config(Shard) of - {emqx_ds_message_storage, Config} -> + {emqx_ds_message_storage_bitmask, Config} -> maps:get(iteration, Config, default_iteration_options()); {_Module, _} -> default_iteration_options() end. --spec default_iteration_options() -> emqx_ds_message_storage:iteration_options(). +-spec default_iteration_options() -> emqx_ds_message_storage_bitmask:iteration_options(). default_iteration_options() -> - {emqx_ds_message_storage, Config} = default_shard_config(), + {emqx_ds_message_storage_bitmask, Config} = default_shard_config(), maps:get(iteration, Config). -spec default_shard_config() -> backend_config(). default_shard_config() -> - {emqx_ds_message_storage, #{ + {emqx_ds_message_storage_bitmask, #{ timestamp_bits => 64, topic_bits_per_level => [8, 8, 8, 32, 16], epoch => 5, diff --git a/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl b/apps/emqx_durable_storage/src/emqx_ds_message_storage_bitmask.erl similarity index 99% rename from apps/emqx_durable_storage/src/emqx_ds_message_storage.erl rename to apps/emqx_durable_storage/src/emqx_ds_message_storage_bitmask.erl index 9ebb23726..7adcb8566 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_message_storage.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_message_storage_bitmask.erl @@ -2,7 +2,7 @@ %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_ds_message_storage). +-module(emqx_ds_message_storage_bitmask). %%================================================================================ %% @doc Description of the schema diff --git a/apps/emqx_durable_storage/test/emqx_ds_message_storage_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_message_storage_bitmask_SUITE.erl similarity index 98% rename from apps/emqx_durable_storage/test/emqx_ds_message_storage_SUITE.erl rename to apps/emqx_durable_storage/test/emqx_ds_message_storage_bitmask_SUITE.erl index cbffcc4a1..599bd6c7b 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_message_storage_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_message_storage_bitmask_SUITE.erl @@ -1,14 +1,14 @@ %%-------------------------------------------------------------------- %% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_ds_message_storage_SUITE). +-module(emqx_ds_message_storage_bitmask_SUITE). -compile(export_all). -compile(nowarn_export_all). -include_lib("stdlib/include/assert.hrl"). --import(emqx_ds_message_storage, [ +-import(emqx_ds_message_storage_bitmask, [ make_keymapper/1, keymapper_info/1, compute_topic_bitmask/2, diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl index 9fd93ecfe..054964373 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl @@ -12,7 +12,7 @@ -define(SHARD, shard(?FUNCTION_NAME)). -define(DEFAULT_CONFIG, - {emqx_ds_message_storage, #{ + {emqx_ds_message_storage_bitmask, #{ timestamp_bits => 64, topic_bits_per_level => [8, 8, 32, 16], epoch => 5, @@ -23,7 +23,7 @@ ). -define(COMPACT_CONFIG, - {emqx_ds_message_storage, #{ + {emqx_ds_message_storage_bitmask, #{ timestamp_bits => 16, topic_bits_per_level => [16, 16], epoch => 10 diff --git a/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_shim.erl b/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_bitmask_shim.erl similarity index 96% rename from apps/emqx_durable_storage/test/props/emqx_ds_message_storage_shim.erl rename to apps/emqx_durable_storage/test/props/emqx_ds_message_storage_bitmask_shim.erl index 7f6cf8e64..59668ca01 100644 --- a/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_shim.erl +++ b/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_bitmask_shim.erl @@ -2,7 +2,7 @@ %% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. %%-------------------------------------------------------------------- --module(emqx_ds_message_storage_shim). +-module(emqx_ds_message_storage_bitmask_shim). -export([open/0]). -export([close/1]). diff --git a/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl b/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl index 08ae5d21d..7452906b8 100644 --- a/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl +++ b/apps/emqx_durable_storage/test/props/prop_replay_message_storage.erl @@ -22,7 +22,7 @@ prop_bitstring_computes() -> Keymapper, keymapper(), ?FORALL({Topic, Timestamp}, {topic(), integer()}, begin - BS = emqx_ds_message_storage:compute_bitstring(Topic, Timestamp, Keymapper), + BS = emqx_ds_message_storage_bitmask:compute_bitstring(Topic, Timestamp, Keymapper), is_integer(BS) andalso (BS < (1 bsl get_keymapper_bitsize(Keymapper))) end) ). @@ -30,7 +30,7 @@ prop_bitstring_computes() -> prop_topic_bitmask_computes() -> Keymapper = make_keymapper(16, [8, 12, 16], 100), ?FORALL(TopicFilter, topic_filter(), begin - Mask = emqx_ds_message_storage:compute_topic_bitmask(TopicFilter, Keymapper), + Mask = emqx_ds_message_storage_bitmask:compute_topic_bitmask(TopicFilter, Keymapper), % topic bits + timestamp LSBs is_integer(Mask) andalso (Mask < (1 bsl (36 + 6))) end). @@ -40,14 +40,14 @@ prop_next_seek_monotonic() -> {TopicFilter, StartTime, Keymapper}, {topic_filter(), pos_integer(), keymapper()}, begin - Filter = emqx_ds_message_storage:make_keyspace_filter( + Filter = emqx_ds_message_storage_bitmask:make_keyspace_filter( {TopicFilter, StartTime}, Keymapper ), ?FORALL( Bitstring, bitstr(get_keymapper_bitsize(Keymapper)), - emqx_ds_message_storage:compute_next_seek(Bitstring, Filter) >= Bitstring + emqx_ds_message_storage_bitmask:compute_next_seek(Bitstring, Filter) >= Bitstring ) end ). @@ -56,8 +56,8 @@ prop_next_seek_eq_initial_seek() -> ?FORALL( Filter, keyspace_filter(), - emqx_ds_message_storage:compute_initial_seek(Filter) =:= - emqx_ds_message_storage:compute_next_seek(0, Filter) + emqx_ds_message_storage_bitmask:compute_initial_seek(Filter) =:= + emqx_ds_message_storage_bitmask:compute_next_seek(0, Filter) ). prop_iterate_messages() -> @@ -72,7 +72,7 @@ prop_iterate_messages() -> ?FORALL(Stream, noshrink(non_empty(messages(topic(TBPL)))), begin Filepath = make_filepath(?FUNCTION_NAME, erlang:system_time(microsecond)), {DB, Handle} = open_db(Filepath, Options), - Shim = emqx_ds_message_storage_shim:open(), + Shim = emqx_ds_message_storage_bitmask_shim:open(), ok = store_db(DB, Stream), ok = store_shim(Shim, Stream), ?FORALL( @@ -92,7 +92,7 @@ prop_iterate_messages() -> Messages = iterate_db(DB, Iteration), Reference = iterate_shim(Shim, Iteration), ok = close_db(Handle), - ok = emqx_ds_message_storage_shim:close(Shim), + ok = emqx_ds_message_storage_bitmask_shim:close(Shim), ?WHENFAIL( begin io:format(user, " *** Filepath = ~s~n", [Filepath]), @@ -182,7 +182,7 @@ prop_iterate_eq_iterate_with_refresh() -> % PublishedAt = ChunkNum, % MessageID, PublishedAt, Topic % ]), -% ok = emqx_ds_message_storage:store(DB, MessageID, PublishedAt, Topic, Payload), +% ok = emqx_ds_message_storage_bitmask:store(DB, MessageID, PublishedAt, Topic, Payload), % store_message_stream(DB, payload_gen:next(Rest)); % store_message_stream(_Zone, []) -> % ok. @@ -191,7 +191,7 @@ store_db(DB, Messages) -> lists:foreach( fun({Topic, Payload = {MessageID, Timestamp, _}}) -> Bin = term_to_binary(Payload), - emqx_ds_message_storage:store(DB, MessageID, Timestamp, Topic, Bin) + emqx_ds_message_storage_bitmask:store(DB, MessageID, Timestamp, Topic, Bin) end, Messages ). @@ -200,7 +200,7 @@ iterate_db(DB, Iteration) -> iterate_db(make_iterator(DB, Iteration)). iterate_db(It) -> - case emqx_ds_message_storage:next(It) of + case emqx_ds_message_storage_bitmask:next(It) of {value, Payload, ItNext} -> [binary_to_term(Payload) | iterate_db(ItNext)]; none -> @@ -208,15 +208,15 @@ iterate_db(It) -> end. make_iterator(DB, Replay) -> - {ok, It} = emqx_ds_message_storage:make_iterator(DB, Replay), + {ok, It} = emqx_ds_message_storage_bitmask:make_iterator(DB, Replay), It. make_iterator(DB, Replay, Options) -> - {ok, It} = emqx_ds_message_storage:make_iterator(DB, Replay, Options), + {ok, It} = emqx_ds_message_storage_bitmask:make_iterator(DB, Replay, Options), It. run_iterator_commands([iterate | Rest], It, Ctx) -> - case emqx_ds_message_storage:next(It) of + case emqx_ds_message_storage_bitmask:next(It) of {value, Payload, ItNext} -> [binary_to_term(Payload) | run_iterator_commands(Rest, ItNext, Ctx)]; none -> @@ -227,8 +227,8 @@ run_iterator_commands([{preserve, restore} | Rest], It, Ctx) -> db := DB, replay := Replay } = Ctx, - Serial = emqx_ds_message_storage:preserve_iterator(It), - {ok, ItNext} = emqx_ds_message_storage:restore_iterator(DB, Replay, Serial), + Serial = emqx_ds_message_storage_bitmask:preserve_iterator(It), + {ok, ItNext} = emqx_ds_message_storage_bitmask:restore_iterator(DB, Replay, Serial), run_iterator_commands(Rest, ItNext, Ctx); run_iterator_commands([], It, _Ctx) -> iterate_db(It). @@ -237,7 +237,7 @@ store_shim(Shim, Messages) -> lists:foreach( fun({Topic, Payload = {MessageID, Timestamp, _}}) -> Bin = term_to_binary(Payload), - emqx_ds_message_storage_shim:store(Shim, MessageID, Timestamp, Topic, Bin) + emqx_ds_message_storage_bitmask_shim:store(Shim, MessageID, Timestamp, Topic, Bin) end, Messages ). @@ -245,7 +245,7 @@ store_shim(Shim, Messages) -> iterate_shim(Shim, Iteration) -> lists:map( fun binary_to_term/1, - emqx_ds_message_storage_shim:iterate(Shim, Iteration) + emqx_ds_message_storage_bitmask_shim:iterate(Shim, Iteration) ). %%-------------------------------------------------------------------- @@ -254,8 +254,8 @@ iterate_shim(Shim, Iteration) -> open_db(Filepath, Options) -> {ok, Handle} = rocksdb:open(Filepath, [{create_if_missing, true}]), - {Schema, CFRefs} = emqx_ds_message_storage:create_new(Handle, ?GEN_ID, Options), - DB = emqx_ds_message_storage:open(?ZONE, Handle, ?GEN_ID, CFRefs, Schema), + {Schema, CFRefs} = emqx_ds_message_storage_bitmask:create_new(Handle, ?GEN_ID, Options), + DB = emqx_ds_message_storage_bitmask:open(?ZONE, Handle, ?GEN_ID, CFRefs, Schema), {DB, Handle}. close_db(Handle) -> @@ -379,7 +379,7 @@ keyspace_filter() -> ?LET( {TopicFilter, StartTime, Keymapper}, {topic_filter(), pos_integer(), keymapper()}, - emqx_ds_message_storage:make_keyspace_filter({TopicFilter, StartTime}, Keymapper) + emqx_ds_message_storage_bitmask:make_keyspace_filter({TopicFilter, StartTime}, Keymapper) ). messages(Topic) -> @@ -426,14 +426,14 @@ flat(T) -> %%-------------------------------------------------------------------- make_keymapper(TimestampBits, TopicBits, MaxEpoch) -> - emqx_ds_message_storage:make_keymapper(#{ + emqx_ds_message_storage_bitmask:make_keymapper(#{ timestamp_bits => TimestampBits, topic_bits_per_level => TopicBits, epoch => MaxEpoch }). get_keymapper_bitsize(Keymapper) -> - maps:get(bitsize, emqx_ds_message_storage:keymapper_info(Keymapper)). + maps:get(bitsize, emqx_ds_message_storage_bitmask:keymapper_info(Keymapper)). -spec interleave(list({Tag, list(E)}), rand:state()) -> list({Tag, E}). interleave(Seqs, Rng) -> From b29c5ad23c25668ae309bb13da714af6f8cbff7a Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Mon, 5 Jun 2023 13:03:47 +0200 Subject: [PATCH 47/49] feat(emqx_ds): Add API draft for logic layer --- apps/emqx_durable_storage/IMPLEMENTATION.md | 75 ++++++++ apps/emqx_durable_storage/src/emqx_ds.erl | 177 ++++++++++++++++++ apps/emqx_durable_storage/src/emqx_ds_app.erl | 15 ++ apps/emqx_durable_storage/src/emqx_ds_int.hrl | 27 +++ ...s.app.src => emqx_durable_storage.app.src} | 0 5 files changed, 294 insertions(+) create mode 100644 apps/emqx_durable_storage/IMPLEMENTATION.md create mode 100644 apps/emqx_durable_storage/src/emqx_ds.erl create mode 100644 apps/emqx_durable_storage/src/emqx_ds_int.hrl rename apps/emqx_durable_storage/src/{emqx_ds.app.src => emqx_durable_storage.app.src} (100%) diff --git a/apps/emqx_durable_storage/IMPLEMENTATION.md b/apps/emqx_durable_storage/IMPLEMENTATION.md new file mode 100644 index 000000000..4c78b8cc8 --- /dev/null +++ b/apps/emqx_durable_storage/IMPLEMENTATION.md @@ -0,0 +1,75 @@ +# General concepts + +In the logic layer we don't speak about replication. +This is because we could use an external DB with its own replication logic. + +On the other hand, we introduce notion of shard right here at the logic. +This is because shared subscription logic needs to be aware of it to some extend, as it has to split work between subscribers somehow. + +# Tables + +## Message storage + +Data is written every time a message matching certain pattern is published. +This pattern is not part of the logic layer spec. + +Write throughput: very high +Data size: very high +Write pattern: append only +Read pattern: pseudoserial + +Number of records: O(total write throughput * retention time) + +## Session storage + +Data there is updated when: + +- A new client connects with clean session = false +- Client subscribes to a topic +- Client unsubscribes to a topic +- Garbage collection is performed + +Write throughput: low + +Data is read when a client connects and replay agents are started + +Read throughput: low + +Data format: + +`#session{clientId = "foobar", iterators = [ItKey1, ItKey2, ItKey3, ...]}` + +Number of records: O(N clients) +Size of record: O(N subscriptions per clients) + +## Iterator storage + +Data is written every time a client acks a message. +Data is read when a client reconnects and we restart replay agents. + +`#iterator{key = IterKey, data = Blob}` + +Number of records: O(N clients * N subscriptions per client) +Size of record: O(1) +Write throughput: high, lots of small updates +Write pattern: mostly key overwrite +Read throughput: low +Read pattern: random + +# Push vs. Pull model + +In push model we have replay agents iterating over the dataset in the shards. + +In pull model the the client processes work with iterators. + +## Push pros: +- Lower latency: message can be dispatched to the client as soon as it's persisted +- Less worry about buffering + +## Push cons: +- Need pushback logic +- It's not entirely justified when working with external DB that may not provide streaming API + +## Pull pros: +- No need for pushback: client advances iterators at its own tempo +- diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl new file mode 100644 index 000000000..b08d5e186 --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -0,0 +1,177 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-module(emqx_ds). + +%% API: +%% Messages: +-export([message_store/2, message_store/1, message_stats/0]). +%% Iterator: +-export([iterator_update/2, iterator_next/1, iterator_stats/0]). +%% Session: +-export([ + session_open/1, + session_drop/1, + session_suspend/1, + session_add_iterator/2, + session_del_iterator/2, + session_stats/0 +]). + +%% internal exports: +-export([]). + +-export_type([ + message_id/0, + message_stats/0, + message_store_opts/0, + session_id/0, + iterator_id/0, + iterator/0 +]). + +-include("emqx_ds_int.hrl"). + +%%================================================================================ +%% Type declarations +%%================================================================================ + +-type session_id() :: emqx_types:clientid(). + +-type iterator() :: term(). + +-opaque iterator_id() :: binary(). + +%%-type session() :: #session{}. + +-type message_store_opts() :: #{}. + +-type message_stats() :: #{}. + +-type message_id() :: binary(). + +%%================================================================================ +%% API funcions +%%================================================================================ + +%%-------------------------------------------------------------------------------- +%% Message +%%-------------------------------------------------------------------------------- +-spec message_store([emqx_types:message()], message_store_opts()) -> + {ok, [message_id()]} | {error, _}. +message_store(_Msg, _Opts) -> + %% TODO + ok. + +-spec message_store([emqx_types:message()]) -> {ok, [message_id()]} | {error, _}. +message_store(Msg) -> + %% TODO + message_store(Msg, #{}). + +-spec message_stats() -> message_stats(). +message_stats() -> + #{}. + +%%-------------------------------------------------------------------------------- +%% Session +%%-------------------------------------------------------------------------------- + +%% @doc Called when a client connects. This function looks up a +%% session or creates a new one if previous one couldn't be found. +%% +%% This function also spawns replay agents for each iterator. +%% +%% Note: session API doesn't handle session takeovers, it's the job of +%% the broker. +-spec session_open(emqx_types:clientid()) -> {_New :: boolean(), session_id(), [iterator_id()]}. +session_open(ClientID) -> + {atomic, Ret} = + mria:transaction( + ?DS_SHARD, + fun() -> + case mnesia:read(?SESSION_TAB, ClientID) of + [#session{iterators = Iterators}] -> + {false, ClientID, Iterators}; + [] -> + Session = #session{id = ClientID, iterators = []}, + mnesia:write(?SESSION_TAB, Session), + {true, ClientID, []} + end + end + ), + Ret. + +%% @doc Called when a client reconnects with `clean session=true' or +%% during session GC +-spec session_drop(emqx_types:clientid()) -> ok. +session_drop(ClientID) -> + {atomic, ok} = mnesia:transaction( + ?DS_SHARD, + fun() -> + mnesia:delete(?SESSION_TAB, ClientID) + end + ), + ok. + +%% @doc Called when a client disconnects. This function terminates all +%% active processes related to the session. +-spec session_suspend(session_id()) -> ok | {error, session_not_found}. +session_suspend(_SessionId) -> + %% TODO + ok. + +%% @doc Called when a client subscribes to a topic. Idempotent. +-spec session_add_iterator(session_id(), emqx_topic:words()) -> + {ok, iterator_id()} | {error, session_not_found}. +session_add_iterator(_SessionId, _TopicFilter) -> + %% TODO + {ok, <<"">>}. + +%% @doc Called when a client unsubscribes from a topic. Returns `true' +%% if the session contained the subscription or `false' if it wasn't +%% subscribed. +-spec session_del_iterator(session_id(), emqx_topic:words()) -> + {ok, boolean()} | {error, session_not_found}. +session_del_iterator(_SessionId, _TopicFilter) -> + %% TODO + false. + +-spec session_stats() -> #{}. +session_stats() -> + #{}. + +%%-------------------------------------------------------------------------------- +%% Iterator (pull API) +%%-------------------------------------------------------------------------------- + +%% @doc Called when a client acks a message +-spec iterator_update(iterator_id(), iterator()) -> ok. +iterator_update(_IterId, _Iter) -> + %% TODO + ok. + +%% @doc Called when a client acks a message +-spec iterator_next(iterator()) -> {value, emqx_types:message(), iterator()} | none | {error, _}. +iterator_next(_Iter) -> + %% TODO + ok. + +-spec iterator_stats() -> #{}. +iterator_stats() -> + #{}. + +%%================================================================================ +%% Internal functions +%%================================================================================ diff --git a/apps/emqx_durable_storage/src/emqx_ds_app.erl b/apps/emqx_durable_storage/src/emqx_ds_app.erl index 858855b6f..fb4d487e9 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_app.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_app.erl @@ -6,5 +6,20 @@ -export([start/2]). +-include("emqx_ds_int.hrl"). + start(_Type, _Args) -> + init_mnesia(), emqx_ds_sup:start_link(). + +init_mnesia() -> + ok = mria:create_table( + ?SESSION_TAB, + [ + {rlog_shard, ?DS_SHARD}, + {type, set}, + {storage, rocksdb_copies}, + {record_name, session}, + {attributes, record_info(fields, session)} + ] + ). diff --git a/apps/emqx_durable_storage/src/emqx_ds_int.hrl b/apps/emqx_durable_storage/src/emqx_ds_int.hrl new file mode 100644 index 000000000..96688ede6 --- /dev/null +++ b/apps/emqx_durable_storage/src/emqx_ds_int.hrl @@ -0,0 +1,27 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- +-ifndef(EMQX_DS_HRL). +-define(EMQX_DS_HRL, true). + +-define(SESSION_TAB, emqx_ds_session). +-define(DS_SHARD, emqx_ds_shard). + +-record(session, { + id :: emqx_ds:session_id(), + iterators :: [{emqx_topic:words(), emqx_ds:iterator_id()}] +}). + +-endif. diff --git a/apps/emqx_durable_storage/src/emqx_ds.app.src b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src similarity index 100% rename from apps/emqx_durable_storage/src/emqx_ds.app.src rename to apps/emqx_durable_storage/src/emqx_durable_storage.app.src From cfcd81e17b151cd03ac6b1f8a2990fbb47113265 Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Wed, 21 Jun 2023 00:00:43 +0200 Subject: [PATCH 48/49] fix(ds): Fix build --- apps/emqx_durable_storage/src/emqx_ds.erl | 12 ++++++------ .../src/emqx_durable_storage.app.src | 4 ++-- .../test/emqx_ds_storage_layer_SUITE.erl | 4 ++-- scripts/check-elixir-applications.exs | 2 +- scripts/check-elixir-deps-discrepancies.exs | 2 +- .../check-elixir-emqx-machine-boot-discrepancies.exs | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index b08d5e186..f0fde46c5 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -73,7 +73,7 @@ {ok, [message_id()]} | {error, _}. message_store(_Msg, _Opts) -> %% TODO - ok. + {error, not_implemented}. -spec message_store([emqx_types:message()]) -> {ok, [message_id()]} | {error, _}. message_store(Msg) -> @@ -106,7 +106,7 @@ session_open(ClientID) -> {false, ClientID, Iterators}; [] -> Session = #session{id = ClientID, iterators = []}, - mnesia:write(?SESSION_TAB, Session), + mnesia:write(?SESSION_TAB, Session, write), {true, ClientID, []} end end @@ -117,10 +117,10 @@ session_open(ClientID) -> %% during session GC -spec session_drop(emqx_types:clientid()) -> ok. session_drop(ClientID) -> - {atomic, ok} = mnesia:transaction( + {atomic, ok} = mria:transaction( ?DS_SHARD, fun() -> - mnesia:delete(?SESSION_TAB, ClientID) + mnesia:delete({?SESSION_TAB, ClientID}) end ), ok. @@ -146,7 +146,7 @@ session_add_iterator(_SessionId, _TopicFilter) -> {ok, boolean()} | {error, session_not_found}. session_del_iterator(_SessionId, _TopicFilter) -> %% TODO - false. + {ok, false}. -spec session_stats() -> #{}. session_stats() -> @@ -166,7 +166,7 @@ iterator_update(_IterId, _Iter) -> -spec iterator_next(iterator()) -> {value, emqx_types:message(), iterator()} | none | {error, _}. iterator_next(_Iter) -> %% TODO - ok. + none. -spec iterator_stats() -> #{}. iterator_stats() -> diff --git a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src index 5a45c08d6..7ea036536 100644 --- a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src +++ b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src @@ -1,11 +1,11 @@ %% -*- mode: erlang -*- -{application, emqx_ds, [ +{application, emqx_durable_storage, [ {description, "Message persistence and subscription replays for EMQX"}, % strict semver, bump manually! {vsn, "0.1.0"}, {modules, []}, {registered, []}, - {applications, [kernel, stdlib, rocksdb, gproc]}, + {applications, [kernel, stdlib, rocksdb, gproc, mria]}, {mod, {emqx_ds_app, []}}, {env, []} ]}. diff --git a/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl b/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl index 054964373..46a1436bb 100644 --- a/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl +++ b/apps/emqx_durable_storage/test/emqx_ds_storage_layer_SUITE.erl @@ -255,11 +255,11 @@ parse_topic(Topic) -> all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - {ok, _} = application:ensure_all_started(emqx_ds), + {ok, _} = application:ensure_all_started(emqx_durable_storage), Config. end_per_suite(_Config) -> - ok = application:stop(emqx_ds). + ok = application:stop(emqx_durable_storage). init_per_testcase(TC, Config) -> ok = set_shard_config(shard(TC), ?DEFAULT_CONFIG), diff --git a/scripts/check-elixir-applications.exs b/scripts/check-elixir-applications.exs index 1e604c69f..42c838199 100755 --- a/scripts/check-elixir-applications.exs +++ b/scripts/check-elixir-applications.exs @@ -1,4 +1,4 @@ -#! /usr/bin/env elixir +#!/usr/bin/env elixir defmodule CheckElixirApplications do alias EMQXUmbrella.MixProject diff --git a/scripts/check-elixir-deps-discrepancies.exs b/scripts/check-elixir-deps-discrepancies.exs index 1363219ed..408079d7d 100755 --- a/scripts/check-elixir-deps-discrepancies.exs +++ b/scripts/check-elixir-deps-discrepancies.exs @@ -1,4 +1,4 @@ -#! /usr/bin/env elixir +#!/usr/bin/env elixir # ensure we have a fresh rebar.lock diff --git a/scripts/check-elixir-emqx-machine-boot-discrepancies.exs b/scripts/check-elixir-emqx-machine-boot-discrepancies.exs index 9ffdc47bf..d07e6978f 100755 --- a/scripts/check-elixir-emqx-machine-boot-discrepancies.exs +++ b/scripts/check-elixir-emqx-machine-boot-discrepancies.exs @@ -1,4 +1,4 @@ -#! /usr/bin/env elixir +#!/usr/bin/env elixir defmodule CheckElixirEMQXMachineBootDiscrepancies do alias EMQXUmbrella.MixProject From 9a81f22105ef7d7223631652a56fb9889e13863e Mon Sep 17 00:00:00 2001 From: ieQu1 <99872536+ieQu1@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:07:30 +0200 Subject: [PATCH 49/49] fix(ds): Apply review remarks --- apps/emqx_durable_storage/src/emqx_ds.erl | 15 ++++++++++++++- .../src/emqx_ds_message_storage_bitmask.erl | 6 +++--- apps/emqx_durable_storage/src/emqx_ds_replay.erl | 15 ++------------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index f0fde46c5..230ca3f9f 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -39,7 +39,10 @@ message_store_opts/0, session_id/0, iterator_id/0, - iterator/0 + iterator/0, + shard/0, + topic/0, + time/0 ]). -include("emqx_ds_int.hrl"). @@ -62,6 +65,16 @@ -type message_id() :: binary(). +%% Parsed topic: +-type topic() :: list(binary()). + +-type shard() :: binary(). + +%% Timestamp +%% Earliest possible timestamp is 0. +%% TODO granularity? +-type time() :: non_neg_integer(). + %%================================================================================ %% API funcions %%================================================================================ diff --git a/apps/emqx_durable_storage/src/emqx_ds_message_storage_bitmask.erl b/apps/emqx_durable_storage/src/emqx_ds_message_storage_bitmask.erl index 7adcb8566..5bb0423d5 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_message_storage_bitmask.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_message_storage_bitmask.erl @@ -38,9 +38,9 @@ %% to map topics to fixed-length bitstrings while keeping some degree %% of information about the hierarchy. %% -%% Next important concept is what we call "epoch". It is time -%% interval determined by the number of least significant bits of the -%% timestamp found at the tail of the rocksdb key. +%% Next important concept is what we call "epoch". Duration of the +%% epoch is determined by maximum time offset. Epoch is calculated by +%% shifting bits of the timestamp right. %% %% The resulting index is a space-filling curve that looks like %% this in the topic-time 2D space: diff --git a/apps/emqx_durable_storage/src/emqx_ds_replay.erl b/apps/emqx_durable_storage/src/emqx_ds_replay.erl index db49c368d..a66cee7fd 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_replay.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_replay.erl @@ -6,28 +6,17 @@ %% API: -export([]). --export_type([topic/0, time/0, shard/0]). -export_type([replay_id/0, replay/0]). %%================================================================================ %% Type declarations %%================================================================================ -%% parsed --type topic() :: list(binary()). - --type shard() :: binary(). - -%% Timestamp -%% Earliest possible timestamp is 0. -%% TODO granularity? --type time() :: non_neg_integer(). - -type replay_id() :: binary(). -type replay() :: { - _TopicFilter :: topic(), - _StartTime :: time() + _TopicFilter :: emqx_ds:topic(), + _StartTime :: emqx_ds:time() }. %%================================================================================