Merge pull request #11720 from ieQu1/dev/refactor-persistent-session
Refactor emqx_durable storage application and introduce learned topic structure storage
This commit is contained in:
commit
788698f157
|
@ -14,9 +14,4 @@
|
|||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-ifndef(EMQX_BPAPI_HRL).
|
||||
-define(EMQX_BPAPI_HRL, true).
|
||||
|
||||
-compile({parse_transform, emqx_bpapi_trans}).
|
||||
|
||||
-endif.
|
||||
-include_lib("emqx_utils/include/bpapi.hrl").
|
||||
|
|
|
@ -55,29 +55,7 @@
|
|||
|
||||
-record(subscription, {topic, subid, subopts}).
|
||||
|
||||
%% See 'Application Message' in MQTT Version 5.0
|
||||
-record(message, {
|
||||
%% Global unique message ID
|
||||
id :: binary(),
|
||||
%% Message QoS
|
||||
qos = 0,
|
||||
%% Message from
|
||||
from :: atom() | binary(),
|
||||
%% Message flags
|
||||
flags = #{} :: emqx_types:flags(),
|
||||
%% Message headers. May contain any metadata. e.g. the
|
||||
%% protocol version number, username, peerhost or
|
||||
%% the PUBLISH properties (MQTT 5.0).
|
||||
headers = #{} :: emqx_types:headers(),
|
||||
%% Topic that the message is published to
|
||||
topic :: emqx_types:topic(),
|
||||
%% Message Payload
|
||||
payload :: emqx_types:payload(),
|
||||
%% Timestamp (Unit: millisecond)
|
||||
timestamp :: integer(),
|
||||
%% not used so far, for future extension
|
||||
extra = [] :: term()
|
||||
}).
|
||||
-include_lib("emqx_utils/include/emqx_message.hrl").
|
||||
|
||||
-record(delivery, {
|
||||
%% Sender of the delivery
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_SUITE).
|
||||
-module(emqx_persistent_session_ds_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
@ -14,7 +14,6 @@
|
|||
-define(DEFAULT_KEYSPACE, default).
|
||||
-define(DS_SHARD_ID, <<"local">>).
|
||||
-define(DS_SHARD, {?DEFAULT_KEYSPACE, ?DS_SHARD_ID}).
|
||||
-define(ITERATOR_REF_TAB, emqx_ds_iterator_ref).
|
||||
|
||||
-import(emqx_common_test_helpers, [on_exit/1]).
|
||||
|
||||
|
@ -91,9 +90,6 @@ get_mqtt_port(Node, Type) ->
|
|||
{_IP, Port} = erpc:call(Node, emqx_config, get, [[listeners, Type, default, bind]]),
|
||||
Port.
|
||||
|
||||
get_all_iterator_refs(Node) ->
|
||||
erpc:call(Node, mnesia, dirty_all_keys, [?ITERATOR_REF_TAB]).
|
||||
|
||||
get_all_iterator_ids(Node) ->
|
||||
Fn = fun(K, _V, Acc) -> [K | Acc] end,
|
||||
erpc:call(Node, fun() ->
|
||||
|
@ -126,6 +122,32 @@ start_client(Opts0 = #{}) ->
|
|||
on_exit(fun() -> catch emqtt:stop(Client) end),
|
||||
Client.
|
||||
|
||||
restart_node(Node, NodeSpec) ->
|
||||
?tp(will_restart_node, #{}),
|
||||
?tp(notice, "restarting node", #{node => Node}),
|
||||
true = monitor_node(Node, true),
|
||||
ok = erpc:call(Node, init, restart, []),
|
||||
receive
|
||||
{nodedown, Node} ->
|
||||
ok
|
||||
after 10_000 ->
|
||||
ct:fail("node ~p didn't stop", [Node])
|
||||
end,
|
||||
?tp(notice, "waiting for nodeup", #{node => Node}),
|
||||
wait_nodeup(Node),
|
||||
wait_gen_rpc_down(NodeSpec),
|
||||
?tp(notice, "restarting apps", #{node => Node}),
|
||||
Apps = maps:get(apps, NodeSpec),
|
||||
ok = erpc:call(Node, emqx_cth_suite, load_apps, [Apps]),
|
||||
_ = erpc:call(Node, emqx_cth_suite, start_apps, [Apps, NodeSpec]),
|
||||
%% have to re-inject this so that we may stop the node succesfully at the
|
||||
%% end....
|
||||
ok = emqx_cth_cluster:set_node_opts(Node, NodeSpec),
|
||||
ok = snabbkaffe:forward_trace(Node),
|
||||
?tp(notice, "node restarted", #{node => Node}),
|
||||
?tp(restarted_node, #{}),
|
||||
ok.
|
||||
|
||||
%%------------------------------------------------------------------------------
|
||||
%% Testcases
|
||||
%%------------------------------------------------------------------------------
|
||||
|
@ -143,24 +165,14 @@ t_non_persistent_session_subscription(_Config) ->
|
|||
{ok, _} = emqtt:connect(Client),
|
||||
?tp(notice, "subscribing", #{}),
|
||||
{ok, _, [?RC_GRANTED_QOS_2]} = emqtt:subscribe(Client, SubTopicFilter, qos2),
|
||||
IteratorRefs = get_all_iterator_refs(node()),
|
||||
IteratorIds = get_all_iterator_ids(node()),
|
||||
|
||||
ok = emqtt:stop(Client),
|
||||
|
||||
#{
|
||||
iterator_refs => IteratorRefs,
|
||||
iterator_ids => IteratorIds
|
||||
}
|
||||
ok
|
||||
end,
|
||||
fun(Res, Trace) ->
|
||||
fun(Trace) ->
|
||||
ct:pal("trace:\n ~p", [Trace]),
|
||||
#{
|
||||
iterator_refs := IteratorRefs,
|
||||
iterator_ids := IteratorIds
|
||||
} = Res,
|
||||
?assertEqual([], IteratorRefs),
|
||||
?assertEqual({ok, []}, IteratorIds),
|
||||
?assertEqual([], ?of_kind(ds_session_subscription_added, Trace)),
|
||||
ok
|
||||
end
|
||||
),
|
||||
|
@ -175,7 +187,7 @@ t_session_subscription_idempotency(Config) ->
|
|||
?check_trace(
|
||||
begin
|
||||
?force_ordering(
|
||||
#{?snk_kind := persistent_session_ds_iterator_added},
|
||||
#{?snk_kind := persistent_session_ds_subscription_added},
|
||||
_NEvents0 = 1,
|
||||
#{?snk_kind := will_restart_node},
|
||||
_Guard0 = true
|
||||
|
@ -187,32 +199,7 @@ t_session_subscription_idempotency(Config) ->
|
|||
_Guard1 = true
|
||||
),
|
||||
|
||||
spawn_link(fun() ->
|
||||
?tp(will_restart_node, #{}),
|
||||
?tp(notice, "restarting node", #{node => Node1}),
|
||||
true = monitor_node(Node1, true),
|
||||
ok = erpc:call(Node1, init, restart, []),
|
||||
receive
|
||||
{nodedown, Node1} ->
|
||||
ok
|
||||
after 10_000 ->
|
||||
ct:fail("node ~p didn't stop", [Node1])
|
||||
end,
|
||||
?tp(notice, "waiting for nodeup", #{node => Node1}),
|
||||
wait_nodeup(Node1),
|
||||
wait_gen_rpc_down(Node1Spec),
|
||||
?tp(notice, "restarting apps", #{node => Node1}),
|
||||
Apps = maps:get(apps, Node1Spec),
|
||||
ok = erpc:call(Node1, emqx_cth_suite, load_apps, [Apps]),
|
||||
_ = erpc:call(Node1, emqx_cth_suite, start_apps, [Apps, Node1Spec]),
|
||||
%% have to re-inject this so that we may stop the node succesfully at the
|
||||
%% end....
|
||||
ok = emqx_cth_cluster:set_node_opts(Node1, Node1Spec),
|
||||
ok = snabbkaffe:forward_trace(Node1),
|
||||
?tp(notice, "node restarted", #{node => Node1}),
|
||||
?tp(restarted_node, #{}),
|
||||
ok
|
||||
end),
|
||||
spawn_link(fun() -> restart_node(Node1, Node1Spec) end),
|
||||
|
||||
?tp(notice, "starting 1", #{}),
|
||||
Client0 = start_client(#{port => Port, clientid => ClientId}),
|
||||
|
@ -223,7 +210,7 @@ t_session_subscription_idempotency(Config) ->
|
|||
receive
|
||||
{'EXIT', {shutdown, _}} ->
|
||||
ok
|
||||
after 0 -> ok
|
||||
after 100 -> ok
|
||||
end,
|
||||
process_flag(trap_exit, false),
|
||||
|
||||
|
@ -240,10 +227,7 @@ t_session_subscription_idempotency(Config) ->
|
|||
end,
|
||||
fun(Trace) ->
|
||||
ct:pal("trace:\n ~p", [Trace]),
|
||||
%% Exactly one iterator should have been opened.
|
||||
SubTopicFilterWords = emqx_topic:words(SubTopicFilter),
|
||||
?assertEqual([{ClientId, SubTopicFilterWords}], get_all_iterator_refs(Node1)),
|
||||
?assertMatch({ok, [_]}, get_all_iterator_ids(Node1)),
|
||||
?assertMatch(
|
||||
{ok, #{}, #{SubTopicFilterWords := #{}}},
|
||||
erpc:call(Node1, emqx_persistent_session_ds, session_open, [ClientId])
|
||||
|
@ -262,7 +246,10 @@ t_session_unsubscription_idempotency(Config) ->
|
|||
?check_trace(
|
||||
begin
|
||||
?force_ordering(
|
||||
#{?snk_kind := persistent_session_ds_close_iterators, ?snk_span := {complete, _}},
|
||||
#{
|
||||
?snk_kind := persistent_session_ds_subscription_delete,
|
||||
?snk_span := {complete, _}
|
||||
},
|
||||
_NEvents0 = 1,
|
||||
#{?snk_kind := will_restart_node},
|
||||
_Guard0 = true
|
||||
|
@ -270,36 +257,11 @@ t_session_unsubscription_idempotency(Config) ->
|
|||
?force_ordering(
|
||||
#{?snk_kind := restarted_node},
|
||||
_NEvents1 = 1,
|
||||
#{?snk_kind := persistent_session_ds_iterator_delete, ?snk_span := start},
|
||||
#{?snk_kind := persistent_session_ds_subscription_route_delete, ?snk_span := start},
|
||||
_Guard1 = true
|
||||
),
|
||||
|
||||
spawn_link(fun() ->
|
||||
?tp(will_restart_node, #{}),
|
||||
?tp(notice, "restarting node", #{node => Node1}),
|
||||
true = monitor_node(Node1, true),
|
||||
ok = erpc:call(Node1, init, restart, []),
|
||||
receive
|
||||
{nodedown, Node1} ->
|
||||
ok
|
||||
after 10_000 ->
|
||||
ct:fail("node ~p didn't stop", [Node1])
|
||||
end,
|
||||
?tp(notice, "waiting for nodeup", #{node => Node1}),
|
||||
wait_nodeup(Node1),
|
||||
wait_gen_rpc_down(Node1Spec),
|
||||
?tp(notice, "restarting apps", #{node => Node1}),
|
||||
Apps = maps:get(apps, Node1Spec),
|
||||
ok = erpc:call(Node1, emqx_cth_suite, load_apps, [Apps]),
|
||||
_ = erpc:call(Node1, emqx_cth_suite, start_apps, [Apps, Node1Spec]),
|
||||
%% have to re-inject this so that we may stop the node succesfully at the
|
||||
%% end....
|
||||
ok = emqx_cth_cluster:set_node_opts(Node1, Node1Spec),
|
||||
ok = snabbkaffe:forward_trace(Node1),
|
||||
?tp(notice, "node restarted", #{node => Node1}),
|
||||
?tp(restarted_node, #{}),
|
||||
ok
|
||||
end),
|
||||
spawn_link(fun() -> restart_node(Node1, Node1Spec) end),
|
||||
|
||||
?tp(notice, "starting 1", #{}),
|
||||
Client0 = start_client(#{port => Port, clientid => ClientId}),
|
||||
|
@ -312,7 +274,7 @@ t_session_unsubscription_idempotency(Config) ->
|
|||
receive
|
||||
{'EXIT', {shutdown, _}} ->
|
||||
ok
|
||||
after 0 -> ok
|
||||
after 100 -> ok
|
||||
end,
|
||||
process_flag(trap_exit, false),
|
||||
|
||||
|
@ -327,7 +289,7 @@ t_session_unsubscription_idempotency(Config) ->
|
|||
?wait_async_action(
|
||||
emqtt:unsubscribe(Client1, SubTopicFilter),
|
||||
#{
|
||||
?snk_kind := persistent_session_ds_iterator_delete,
|
||||
?snk_kind := persistent_session_ds_subscription_route_delete,
|
||||
?snk_span := {complete, _}
|
||||
},
|
||||
15_000
|
||||
|
@ -339,9 +301,10 @@ t_session_unsubscription_idempotency(Config) ->
|
|||
end,
|
||||
fun(Trace) ->
|
||||
ct:pal("trace:\n ~p", [Trace]),
|
||||
%% No iterators remaining
|
||||
?assertEqual([], get_all_iterator_refs(Node1)),
|
||||
?assertEqual({ok, []}, get_all_iterator_ids(Node1)),
|
||||
?assertMatch(
|
||||
{ok, #{}, Subs = #{}} when map_size(Subs) =:= 0,
|
||||
erpc:call(Node1, emqx_persistent_session_ds, session_open, [ClientId])
|
||||
),
|
||||
ok
|
||||
end
|
||||
),
|
|
@ -18,6 +18,7 @@
|
|||
{emqx_dashboard,1}.
|
||||
{emqx_delayed,1}.
|
||||
{emqx_delayed,2}.
|
||||
{emqx_ds,1}.
|
||||
{emqx_eviction_agent,1}.
|
||||
{emqx_eviction_agent,2}.
|
||||
{emqx_exhook,1}.
|
||||
|
|
|
@ -66,7 +66,8 @@
|
|||
|
||||
-export([
|
||||
is_expired/1,
|
||||
update_expiry/1
|
||||
update_expiry/1,
|
||||
timestamp_now/0
|
||||
]).
|
||||
|
||||
-export([
|
||||
|
@ -113,14 +114,13 @@ make(From, Topic, Payload) ->
|
|||
emqx_types:payload()
|
||||
) -> emqx_types:message().
|
||||
make(From, QoS, Topic, Payload) when ?QOS_0 =< QoS, QoS =< ?QOS_2 ->
|
||||
Now = erlang:system_time(millisecond),
|
||||
#message{
|
||||
id = emqx_guid:gen(),
|
||||
qos = QoS,
|
||||
from = From,
|
||||
topic = Topic,
|
||||
payload = Payload,
|
||||
timestamp = Now
|
||||
timestamp = timestamp_now()
|
||||
}.
|
||||
|
||||
-spec make(
|
||||
|
@ -137,7 +137,6 @@ make(From, QoS, Topic, Payload, Flags, Headers) when
|
|||
is_map(Flags),
|
||||
is_map(Headers)
|
||||
->
|
||||
Now = erlang:system_time(millisecond),
|
||||
#message{
|
||||
id = emqx_guid:gen(),
|
||||
qos = QoS,
|
||||
|
@ -146,7 +145,7 @@ make(From, QoS, Topic, Payload, Flags, Headers) when
|
|||
headers = Headers,
|
||||
topic = Topic,
|
||||
payload = Payload,
|
||||
timestamp = Now
|
||||
timestamp = timestamp_now()
|
||||
}.
|
||||
|
||||
-spec make(
|
||||
|
@ -164,7 +163,6 @@ make(MsgId, From, QoS, Topic, Payload, Flags, Headers) when
|
|||
is_map(Flags),
|
||||
is_map(Headers)
|
||||
->
|
||||
Now = erlang:system_time(millisecond),
|
||||
#message{
|
||||
id = MsgId,
|
||||
qos = QoS,
|
||||
|
@ -173,7 +171,7 @@ make(MsgId, From, QoS, Topic, Payload, Flags, Headers) when
|
|||
headers = Headers,
|
||||
topic = Topic,
|
||||
payload = Payload,
|
||||
timestamp = Now
|
||||
timestamp = timestamp_now()
|
||||
}.
|
||||
|
||||
%% optimistic esitmation of a message size after serialization
|
||||
|
@ -403,6 +401,11 @@ from_map(#{
|
|||
extra = Extra
|
||||
}.
|
||||
|
||||
%% @doc Get current timestamp in milliseconds.
|
||||
-spec timestamp_now() -> integer().
|
||||
timestamp_now() ->
|
||||
erlang:system_time(millisecond).
|
||||
|
||||
%% MilliSeconds
|
||||
elapsed(Since) ->
|
||||
max(0, erlang:system_time(millisecond) - Since).
|
||||
max(0, timestamp_now() - Since).
|
||||
|
|
|
@ -23,16 +23,12 @@
|
|||
|
||||
%% Message persistence
|
||||
-export([
|
||||
persist/1,
|
||||
serialize/1,
|
||||
deserialize/1
|
||||
persist/1
|
||||
]).
|
||||
|
||||
%% FIXME
|
||||
-define(DS_SHARD_ID, <<"local">>).
|
||||
-define(DEFAULT_KEYSPACE, default).
|
||||
-define(DS_SHARD, {?DEFAULT_KEYSPACE, ?DS_SHARD_ID}).
|
||||
-define(PERSISTENT_MESSAGE_DB, emqx_persistent_message).
|
||||
|
||||
%% FIXME
|
||||
-define(WHEN_ENABLED(DO),
|
||||
case is_store_enabled() of
|
||||
true -> DO;
|
||||
|
@ -44,18 +40,10 @@
|
|||
|
||||
init() ->
|
||||
?WHEN_ENABLED(begin
|
||||
ok = emqx_ds:ensure_shard(
|
||||
?DS_SHARD,
|
||||
#{
|
||||
dir => filename:join([
|
||||
emqx:data_dir(),
|
||||
ds,
|
||||
messages,
|
||||
?DEFAULT_KEYSPACE,
|
||||
?DS_SHARD_ID
|
||||
])
|
||||
}
|
||||
),
|
||||
ok = emqx_ds:open_db(?PERSISTENT_MESSAGE_DB, #{
|
||||
backend => builtin,
|
||||
storage => {emqx_ds_storage_bitfield_lts, #{}}
|
||||
}),
|
||||
ok = emqx_persistent_session_ds_router:init_tables(),
|
||||
ok = emqx_persistent_session_ds:create_tables(),
|
||||
ok
|
||||
|
@ -82,19 +70,11 @@ persist(Msg) ->
|
|||
needs_persistence(Msg) ->
|
||||
not (emqx_message:get_flag(dup, Msg) orelse emqx_message:is_sys(Msg)).
|
||||
|
||||
-spec store_message(emqx_types:message()) -> emqx_ds:store_batch_result().
|
||||
store_message(Msg) ->
|
||||
ID = emqx_message:id(Msg),
|
||||
Timestamp = emqx_guid:timestamp(ID),
|
||||
Topic = emqx_topic:words(emqx_message:topic(Msg)),
|
||||
emqx_ds_storage_layer:store(?DS_SHARD, ID, Timestamp, Topic, serialize(Msg)).
|
||||
emqx_ds:store_batch(?PERSISTENT_MESSAGE_DB, [Msg]).
|
||||
|
||||
has_subscribers(#message{topic = Topic}) ->
|
||||
emqx_persistent_session_ds_router:has_any_route(Topic).
|
||||
|
||||
%%
|
||||
|
||||
serialize(Msg) ->
|
||||
term_to_binary(emqx_message:to_map(Msg)).
|
||||
|
||||
deserialize(Bin) ->
|
||||
emqx_message:from_map(binary_to_term(Bin)).
|
||||
|
|
|
@ -0,0 +1,213 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% @doc This module implements the routines for replaying streams of
|
||||
%% messages.
|
||||
-module(emqx_persistent_message_ds_replayer).
|
||||
|
||||
%% API:
|
||||
-export([new/0, next_packet_id/1, replay/2, commit_offset/3, poll/3]).
|
||||
|
||||
%% internal exports:
|
||||
-export([]).
|
||||
|
||||
-export_type([inflight/0]).
|
||||
|
||||
-include("emqx_persistent_session_ds.hrl").
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
%% Note: sequence numbers are monotonic; they don't wrap around:
|
||||
-type seqno() :: non_neg_integer().
|
||||
|
||||
-record(range, {
|
||||
stream :: emqx_ds:stream(),
|
||||
first :: seqno(),
|
||||
last :: seqno(),
|
||||
iterator_next :: emqx_ds:iterator() | undefined
|
||||
}).
|
||||
|
||||
-type range() :: #range{}.
|
||||
|
||||
-record(inflight, {
|
||||
next_seqno = 0 :: seqno(),
|
||||
acked_seqno = 0 :: seqno(),
|
||||
offset_ranges = [] :: [range()]
|
||||
}).
|
||||
|
||||
-opaque inflight() :: #inflight{}.
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
-spec new() -> inflight().
|
||||
new() ->
|
||||
#inflight{}.
|
||||
|
||||
-spec next_packet_id(inflight()) -> {emqx_types:packet_id(), inflight()}.
|
||||
next_packet_id(Inflight0 = #inflight{next_seqno = LastSeqno}) ->
|
||||
Inflight = Inflight0#inflight{next_seqno = LastSeqno + 1},
|
||||
{seqno_to_packet_id(LastSeqno), Inflight}.
|
||||
|
||||
-spec replay(emqx_persistent_session_ds:id(), inflight()) ->
|
||||
emqx_session:replies().
|
||||
replay(_SessionId, _Inflight = #inflight{offset_ranges = _Ranges}) ->
|
||||
[].
|
||||
|
||||
-spec commit_offset(emqx_persistent_session_ds:id(), emqx_types:packet_id(), inflight()) ->
|
||||
{_IsValidOffset :: boolean(), inflight()}.
|
||||
commit_offset(
|
||||
SessionId,
|
||||
PacketId,
|
||||
Inflight0 = #inflight{
|
||||
acked_seqno = AckedSeqno0, next_seqno = NextSeqNo, offset_ranges = Ranges0
|
||||
}
|
||||
) ->
|
||||
AckedSeqno = packet_id_to_seqno(NextSeqNo, PacketId),
|
||||
true = AckedSeqno0 < AckedSeqno,
|
||||
Ranges = lists:filter(
|
||||
fun(#range{stream = Stream, last = LastSeqno, iterator_next = ItNext}) ->
|
||||
case LastSeqno =< AckedSeqno of
|
||||
true ->
|
||||
%% This range has been fully
|
||||
%% acked. Remove it and replace saved
|
||||
%% iterator with the trailing iterator.
|
||||
update_iterator(SessionId, Stream, ItNext),
|
||||
false;
|
||||
false ->
|
||||
%% This range still has unacked
|
||||
%% messages:
|
||||
true
|
||||
end
|
||||
end,
|
||||
Ranges0
|
||||
),
|
||||
Inflight = Inflight0#inflight{acked_seqno = AckedSeqno, offset_ranges = Ranges},
|
||||
{true, Inflight}.
|
||||
|
||||
-spec poll(emqx_persistent_session_ds:id(), inflight(), pos_integer()) ->
|
||||
{emqx_session:replies(), inflight()}.
|
||||
poll(SessionId, Inflight0, WindowSize) when WindowSize > 0, WindowSize < 16#7fff ->
|
||||
#inflight{next_seqno = NextSeqNo0, acked_seqno = AckedSeqno} =
|
||||
Inflight0,
|
||||
FetchThreshold = max(1, WindowSize div 2),
|
||||
FreeSpace = AckedSeqno + WindowSize - NextSeqNo0,
|
||||
case FreeSpace >= FetchThreshold of
|
||||
false ->
|
||||
%% TODO: this branch is meant to avoid fetching data from
|
||||
%% the DB in chunks that are too small. However, this
|
||||
%% logic is not exactly good for the latency. Can the
|
||||
%% client get stuck even?
|
||||
{[], Inflight0};
|
||||
true ->
|
||||
Streams = shuffle(get_streams(SessionId)),
|
||||
fetch(SessionId, Inflight0, Streams, FreeSpace, [])
|
||||
end.
|
||||
|
||||
%%================================================================================
|
||||
%% Internal exports
|
||||
%%================================================================================
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
fetch(_SessionId, Inflight, _Streams = [], _N, Acc) ->
|
||||
{lists:reverse(Acc), Inflight};
|
||||
fetch(_SessionId, Inflight, _Streams, 0, Acc) ->
|
||||
{lists:reverse(Acc), Inflight};
|
||||
fetch(SessionId, Inflight0, [#ds_stream{stream = Stream} | Streams], N, Publishes0) ->
|
||||
#inflight{next_seqno = FirstSeqNo, offset_ranges = Ranges0} = Inflight0,
|
||||
ItBegin = get_last_iterator(SessionId, Stream, Ranges0),
|
||||
{ok, ItEnd, Messages} = emqx_ds:next(ItBegin, N),
|
||||
{Publishes, Inflight1} =
|
||||
lists:foldl(
|
||||
fun(Msg, {PubAcc0, InflightAcc0}) ->
|
||||
{PacketId, InflightAcc} = next_packet_id(InflightAcc0),
|
||||
PubAcc = [{PacketId, Msg} | PubAcc0],
|
||||
{PubAcc, InflightAcc}
|
||||
end,
|
||||
{Publishes0, Inflight0},
|
||||
Messages
|
||||
),
|
||||
#inflight{next_seqno = LastSeqNo} = Inflight1,
|
||||
NMessages = LastSeqNo - FirstSeqNo,
|
||||
case NMessages > 0 of
|
||||
true ->
|
||||
Range = #range{
|
||||
first = FirstSeqNo,
|
||||
last = LastSeqNo - 1,
|
||||
stream = Stream,
|
||||
iterator_next = ItEnd
|
||||
},
|
||||
Inflight = Inflight1#inflight{offset_ranges = Ranges0 ++ [Range]},
|
||||
fetch(SessionId, Inflight, Streams, N - NMessages, Publishes);
|
||||
false ->
|
||||
fetch(SessionId, Inflight1, Streams, N, Publishes)
|
||||
end.
|
||||
|
||||
update_iterator(SessionId, Stream, Iterator) ->
|
||||
mria:dirty_write(?SESSION_ITER_TAB, #ds_iter{id = {SessionId, Stream}, iter = Iterator}).
|
||||
|
||||
get_last_iterator(SessionId, Stream, Ranges) ->
|
||||
case lists:keyfind(Stream, #range.stream, lists:reverse(Ranges)) of
|
||||
false ->
|
||||
get_iterator(SessionId, Stream);
|
||||
#range{iterator_next = Next} ->
|
||||
Next
|
||||
end.
|
||||
|
||||
get_iterator(SessionId, Stream) ->
|
||||
Id = {SessionId, Stream},
|
||||
[#ds_iter{iter = It}] = mnesia:dirty_read(?SESSION_ITER_TAB, Id),
|
||||
It.
|
||||
|
||||
get_streams(SessionId) ->
|
||||
mnesia:dirty_read(?SESSION_STREAM_TAB, SessionId).
|
||||
|
||||
%% Packet ID as defined by MQTT protocol is a 16-bit integer in range
|
||||
%% 1..FFFF. This function translates internal session sequence number
|
||||
%% to MQTT packet ID by chopping off most significant bits and adding
|
||||
%% 1. This assumes that there's never more FFFF in-flight packets at
|
||||
%% any time:
|
||||
-spec seqno_to_packet_id(non_neg_integer()) -> emqx_types:packet_id().
|
||||
seqno_to_packet_id(Counter) ->
|
||||
Counter rem 16#ffff + 1.
|
||||
|
||||
%% Reconstruct session counter by adding most significant bits from
|
||||
%% the current counter to the packet id.
|
||||
-spec packet_id_to_seqno(non_neg_integer(), emqx_types:packet_id()) -> non_neg_integer().
|
||||
packet_id_to_seqno(NextSeqNo, PacketId) ->
|
||||
N = ((NextSeqNo bsr 16) bsl 16) + PacketId,
|
||||
case N > NextSeqNo of
|
||||
true -> N - 16#10000;
|
||||
false -> N
|
||||
end.
|
||||
|
||||
-spec shuffle([A]) -> [A].
|
||||
shuffle(L0) ->
|
||||
L1 = lists:map(
|
||||
fun(A) ->
|
||||
{rand:uniform(), A}
|
||||
end,
|
||||
L0
|
||||
),
|
||||
L2 = lists:sort(L1),
|
||||
{_, L} = lists:unzip(L2),
|
||||
L.
|
|
@ -18,9 +18,12 @@
|
|||
|
||||
-include("emqx.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
-include_lib("stdlib/include/ms_transform.hrl").
|
||||
|
||||
-include("emqx_mqtt.hrl").
|
||||
|
||||
-include("emqx_persistent_session_ds.hrl").
|
||||
|
||||
%% Session API
|
||||
-export([
|
||||
create/3,
|
||||
|
@ -50,7 +53,7 @@
|
|||
-export([
|
||||
deliver/3,
|
||||
replay/3,
|
||||
% handle_timeout/3,
|
||||
handle_timeout/3,
|
||||
disconnect/1,
|
||||
terminate/2
|
||||
]).
|
||||
|
@ -58,33 +61,27 @@
|
|||
%% session table operations
|
||||
-export([create_tables/0]).
|
||||
|
||||
-ifdef(TEST).
|
||||
-export([session_open/1]).
|
||||
-endif.
|
||||
|
||||
%% RPC
|
||||
-export([
|
||||
ensure_iterator_closed_on_all_shards/1,
|
||||
ensure_all_iterators_closed/1
|
||||
]).
|
||||
%% Remove me later (satisfy checks for an unused BPAPI)
|
||||
-export([
|
||||
do_open_iterator/3,
|
||||
do_ensure_iterator_closed/1,
|
||||
do_ensure_all_iterators_closed/1
|
||||
]).
|
||||
|
||||
%% FIXME
|
||||
-define(DS_SHARD_ID, <<"local">>).
|
||||
-define(DEFAULT_KEYSPACE, default).
|
||||
-define(DS_SHARD, {?DEFAULT_KEYSPACE, ?DS_SHARD_ID}).
|
||||
-ifdef(TEST).
|
||||
-export([session_open/1]).
|
||||
-endif.
|
||||
|
||||
%% Currently, this is the clientid. We avoid `emqx_types:clientid()' because that can be
|
||||
%% an atom, in theory (?).
|
||||
-type id() :: binary().
|
||||
-type iterator() :: emqx_ds:iterator().
|
||||
-type iterator_id() :: emqx_ds:iterator_id().
|
||||
-type topic_filter() :: emqx_ds:topic_filter().
|
||||
-type iterators() :: #{topic_filter() => iterator()}.
|
||||
-type subscription_id() :: {id(), topic_filter()}.
|
||||
-type subscription() :: #{
|
||||
start_time := emqx_ds:time(),
|
||||
propts := map(),
|
||||
extra := map()
|
||||
}.
|
||||
-type session() :: #{
|
||||
%% Client ID
|
||||
id := id(),
|
||||
|
@ -93,11 +90,15 @@
|
|||
%% When the session should expire
|
||||
expires_at := timestamp() | never,
|
||||
%% Client’s Subscriptions.
|
||||
iterators := #{topic() => iterator()},
|
||||
iterators := #{topic() => subscription()},
|
||||
%% Inflight messages
|
||||
inflight := emqx_persistent_message_ds_replayer:inflight(),
|
||||
%%
|
||||
props := map()
|
||||
}.
|
||||
|
||||
%% -type session() :: #session{}.
|
||||
|
||||
-type timestamp() :: emqx_utils_calendar:epoch_millisecond().
|
||||
-type topic() :: emqx_types:topic().
|
||||
-type clientinfo() :: emqx_types:clientinfo().
|
||||
|
@ -106,12 +107,15 @@
|
|||
|
||||
-export_type([id/0]).
|
||||
|
||||
-define(PERSISTENT_MESSAGE_DB, emqx_persistent_message).
|
||||
|
||||
%%
|
||||
|
||||
-spec create(clientinfo(), conninfo(), emqx_session:conf()) ->
|
||||
session().
|
||||
create(#{clientid := ClientID}, _ConnInfo, Conf) ->
|
||||
% TODO: expiration
|
||||
ensure_timers(),
|
||||
ensure_session(ClientID, Conf).
|
||||
|
||||
-spec open(clientinfo(), conninfo()) ->
|
||||
|
@ -126,6 +130,7 @@ open(#{clientid := ClientID}, _ConnInfo) ->
|
|||
ok = emqx_cm:discard_session(ClientID),
|
||||
case open_session(ClientID) of
|
||||
Session = #{} ->
|
||||
ensure_timers(),
|
||||
{true, Session, []};
|
||||
false ->
|
||||
false
|
||||
|
@ -137,17 +142,17 @@ ensure_session(ClientID, Conf) ->
|
|||
|
||||
open_session(ClientID) ->
|
||||
case session_open(ClientID) of
|
||||
{ok, Session, Iterators} ->
|
||||
Session#{iterators => prep_iterators(Iterators)};
|
||||
{ok, Session, Subscriptions} ->
|
||||
Session#{iterators => prep_subscriptions(Subscriptions)};
|
||||
false ->
|
||||
false
|
||||
end.
|
||||
|
||||
prep_iterators(Iterators) ->
|
||||
prep_subscriptions(Subscriptions) ->
|
||||
maps:fold(
|
||||
fun(Topic, Iterator, Acc) -> Acc#{emqx_topic:join(Topic) => Iterator} end,
|
||||
fun(Topic, Subscription, Acc) -> Acc#{emqx_topic:join(Topic) => Subscription} end,
|
||||
#{},
|
||||
Iterators
|
||||
Subscriptions
|
||||
).
|
||||
|
||||
-spec destroy(session() | clientinfo()) -> ok.
|
||||
|
@ -157,7 +162,6 @@ destroy(#{clientid := ClientID}) ->
|
|||
destroy_session(ClientID).
|
||||
|
||||
destroy_session(ClientID) ->
|
||||
_ = ensure_all_iterators_closed(ClientID),
|
||||
session_drop(ClientID).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
|
@ -245,7 +249,7 @@ unsubscribe(
|
|||
) when is_map_key(TopicFilter, Iters) ->
|
||||
Iterator = maps:get(TopicFilter, Iters),
|
||||
SubOpts = maps:get(props, Iterator),
|
||||
ok = del_subscription(TopicFilter, Iterator, ID),
|
||||
ok = del_subscription(TopicFilter, ID),
|
||||
{ok, Session#{iterators := maps:remove(TopicFilter, Iters)}, SubOpts};
|
||||
unsubscribe(
|
||||
_TopicFilter,
|
||||
|
@ -271,19 +275,29 @@ get_subscription(TopicFilter, #{iterators := Iters}) ->
|
|||
{ok, emqx_types:publish_result(), replies(), session()}
|
||||
| {error, emqx_types:reason_code()}.
|
||||
publish(_PacketId, Msg, Session) ->
|
||||
% TODO: stub
|
||||
{ok, emqx_broker:publish(Msg), [], Session}.
|
||||
%% TODO:
|
||||
Result = emqx_broker:publish(Msg),
|
||||
{ok, Result, [], Session}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Client -> Broker: PUBACK
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% FIXME: parts of the commit offset function are mocked
|
||||
-dialyzer({nowarn_function, puback/3}).
|
||||
|
||||
-spec puback(clientinfo(), emqx_types:packet_id(), session()) ->
|
||||
{ok, emqx_types:message(), replies(), session()}
|
||||
| {error, emqx_types:reason_code()}.
|
||||
puback(_ClientInfo, _PacketId, _Session = #{}) ->
|
||||
% TODO: stub
|
||||
{error, ?RC_PACKET_IDENTIFIER_NOT_FOUND}.
|
||||
puback(_ClientInfo, PacketId, Session = #{id := Id, inflight := Inflight0}) ->
|
||||
case emqx_persistent_message_ds_replayer:commit_offset(Id, PacketId, Inflight0) of
|
||||
{true, Inflight} ->
|
||||
%% TODO
|
||||
Msg = #message{},
|
||||
{ok, Msg, [], Session#{inflight => Inflight}};
|
||||
{false, _} ->
|
||||
{error, ?RC_PACKET_IDENTIFIER_NOT_FOUND}
|
||||
end.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Client -> Broker: PUBREC
|
||||
|
@ -320,10 +334,22 @@ pubcomp(_ClientInfo, _PacketId, _Session = #{}) ->
|
|||
%%--------------------------------------------------------------------
|
||||
|
||||
-spec deliver(clientinfo(), [emqx_types:deliver()], session()) ->
|
||||
no_return().
|
||||
deliver(_ClientInfo, _Delivers, _Session = #{}) ->
|
||||
% TODO: ensure it's unreachable somehow
|
||||
error(unexpected).
|
||||
{ok, replies(), session()}.
|
||||
deliver(_ClientInfo, _Delivers, Session) ->
|
||||
%% TODO: QoS0 and system messages end up here.
|
||||
{ok, [], Session}.
|
||||
|
||||
-spec handle_timeout(clientinfo(), _Timeout, session()) ->
|
||||
{ok, replies(), session()} | {ok, replies(), timeout(), session()}.
|
||||
handle_timeout(_ClientInfo, pull, Session = #{id := Id, inflight := Inflight0}) ->
|
||||
WindowSize = 100,
|
||||
{Publishes, Inflight} = emqx_persistent_message_ds_replayer:poll(Id, Inflight0, WindowSize),
|
||||
ensure_timer(pull),
|
||||
{ok, Publishes, Session#{inflight => Inflight}};
|
||||
handle_timeout(_ClientInfo, get_streams, Session = #{id := Id}) ->
|
||||
renew_streams(Id),
|
||||
ensure_timer(get_streams),
|
||||
{ok, [], Session}.
|
||||
|
||||
-spec replay(clientinfo(), [], session()) ->
|
||||
{ok, replies(), session()}.
|
||||
|
@ -344,151 +370,69 @@ terminate(_Reason, _Session = #{}) ->
|
|||
%%--------------------------------------------------------------------
|
||||
|
||||
-spec add_subscription(topic(), emqx_types:subopts(), id()) ->
|
||||
emqx_ds:iterator().
|
||||
subscription().
|
||||
add_subscription(TopicFilterBin, SubOpts, DSSessionID) ->
|
||||
% N.B.: we chose to update the router before adding the subscription to the
|
||||
% session/iterator table. The reasoning for this is as follows:
|
||||
%
|
||||
% Messages matching this topic filter should start to be persisted as soon as
|
||||
% possible to avoid missing messages. If this is the first such persistent
|
||||
% session subscription, it's important to do so early on.
|
||||
%
|
||||
% This could, in turn, lead to some inconsistency: if such a route gets
|
||||
% created but the session/iterator data fails to be updated accordingly, we
|
||||
% have a dangling route. To remove such dangling routes, we may have a
|
||||
% periodic GC process that removes routes that do not have a matching
|
||||
% persistent subscription. Also, route operations use dirty mnesia
|
||||
% operations, which inherently have room for inconsistencies.
|
||||
%
|
||||
% In practice, we use the iterator reference table as a source of truth,
|
||||
% since it is guarded by a transaction context: we consider a subscription
|
||||
% operation to be successful if it ended up changing this table. Both router
|
||||
% and iterator information can be reconstructed from this table, if needed.
|
||||
%% N.B.: we chose to update the router before adding the subscription to the
|
||||
%% session/iterator table. The reasoning for this is as follows:
|
||||
%%
|
||||
%% Messages matching this topic filter should start to be persisted as soon as
|
||||
%% possible to avoid missing messages. If this is the first such persistent
|
||||
%% session subscription, it's important to do so early on.
|
||||
%%
|
||||
%% This could, in turn, lead to some inconsistency: if such a route gets
|
||||
%% created but the session/iterator data fails to be updated accordingly, we
|
||||
%% have a dangling route. To remove such dangling routes, we may have a
|
||||
%% periodic GC process that removes routes that do not have a matching
|
||||
%% persistent subscription. Also, route operations use dirty mnesia
|
||||
%% operations, which inherently have room for inconsistencies.
|
||||
%%
|
||||
%% In practice, we use the iterator reference table as a source of truth,
|
||||
%% since it is guarded by a transaction context: we consider a subscription
|
||||
%% operation to be successful if it ended up changing this table. Both router
|
||||
%% and iterator information can be reconstructed from this table, if needed.
|
||||
ok = emqx_persistent_session_ds_router:do_add_route(TopicFilterBin, DSSessionID),
|
||||
TopicFilter = emqx_topic:words(TopicFilterBin),
|
||||
{ok, Iterator, IsNew} = session_add_iterator(
|
||||
{ok, DSSubExt, IsNew} = session_add_subscription(
|
||||
DSSessionID, TopicFilter, SubOpts
|
||||
),
|
||||
Ctx = #{iterator => Iterator, is_new => IsNew},
|
||||
?tp(persistent_session_ds_iterator_added, Ctx),
|
||||
?tp_span(
|
||||
persistent_session_ds_open_iterators,
|
||||
Ctx,
|
||||
ok = open_iterator_on_all_shards(TopicFilter, Iterator)
|
||||
),
|
||||
Iterator.
|
||||
?tp(persistent_session_ds_subscription_added, #{sub => DSSubExt, is_new => IsNew}),
|
||||
%% we'll list streams and open iterators when implementing message replay.
|
||||
DSSubExt.
|
||||
|
||||
-spec update_subscription(topic(), iterator(), emqx_types:subopts(), id()) ->
|
||||
iterator().
|
||||
update_subscription(TopicFilterBin, Iterator, SubOpts, DSSessionID) ->
|
||||
-spec update_subscription(topic(), subscription(), emqx_types:subopts(), id()) ->
|
||||
subscription().
|
||||
update_subscription(TopicFilterBin, DSSubExt, SubOpts, DSSessionID) ->
|
||||
TopicFilter = emqx_topic:words(TopicFilterBin),
|
||||
{ok, NIterator, false} = session_add_iterator(
|
||||
{ok, NDSSubExt, false} = session_add_subscription(
|
||||
DSSessionID, TopicFilter, SubOpts
|
||||
),
|
||||
ok = ?tp(persistent_session_ds_iterator_updated, #{iterator => Iterator}),
|
||||
NIterator.
|
||||
ok = ?tp(persistent_session_ds_iterator_updated, #{sub => DSSubExt}),
|
||||
NDSSubExt.
|
||||
|
||||
-spec open_iterator_on_all_shards(emqx_types:words(), emqx_ds:iterator()) -> ok.
|
||||
open_iterator_on_all_shards(TopicFilter, Iterator) ->
|
||||
?tp(persistent_session_ds_will_open_iterators, #{iterator => Iterator}),
|
||||
%% Note: currently, shards map 1:1 to nodes, but this will change in the future.
|
||||
Nodes = emqx:running_nodes(),
|
||||
Results = emqx_persistent_session_ds_proto_v1:open_iterator(
|
||||
Nodes,
|
||||
TopicFilter,
|
||||
maps:get(start_time, Iterator),
|
||||
maps:get(id, Iterator)
|
||||
),
|
||||
%% TODO
|
||||
%% 1. Handle errors.
|
||||
%% 2. Iterator handles are rocksdb resources, it's doubtful they survive RPC.
|
||||
%% Even if they do, we throw them away here anyway. All in all, we probably should
|
||||
%% hold each of them in a process on the respective node.
|
||||
true = lists:all(fun(Res) -> element(1, Res) =:= ok end, Results),
|
||||
-spec del_subscription(topic(), id()) ->
|
||||
ok.
|
||||
|
||||
%% RPC target.
|
||||
-spec do_open_iterator(emqx_types:words(), emqx_ds:time(), emqx_ds:iterator_id()) ->
|
||||
{ok, emqx_ds_storage_layer:iterator()} | {error, _Reason}.
|
||||
do_open_iterator(TopicFilter, StartMS, IteratorID) ->
|
||||
Replay = {TopicFilter, StartMS},
|
||||
emqx_ds_storage_layer:ensure_iterator(?DS_SHARD, IteratorID, Replay).
|
||||
|
||||
-spec del_subscription(topic(), iterator(), id()) ->
|
||||
ok.
|
||||
del_subscription(TopicFilterBin, #{id := IteratorID}, DSSessionID) ->
|
||||
% N.B.: see comments in `?MODULE:add_subscription' for a discussion about the
|
||||
% order of operations here.
|
||||
del_subscription(TopicFilterBin, DSSessionId) ->
|
||||
TopicFilter = emqx_topic:words(TopicFilterBin),
|
||||
Ctx = #{iterator_id => IteratorID},
|
||||
?tp_span(
|
||||
persistent_session_ds_close_iterators,
|
||||
Ctx,
|
||||
ok = ensure_iterator_closed_on_all_shards(IteratorID)
|
||||
persistent_session_ds_subscription_delete,
|
||||
#{session_id => DSSessionId},
|
||||
ok = session_del_subscription(DSSessionId, TopicFilter)
|
||||
),
|
||||
?tp_span(
|
||||
persistent_session_ds_iterator_delete,
|
||||
Ctx,
|
||||
session_del_iterator(DSSessionID, TopicFilter)
|
||||
),
|
||||
ok = emqx_persistent_session_ds_router:do_delete_route(TopicFilterBin, DSSessionID).
|
||||
|
||||
-spec ensure_iterator_closed_on_all_shards(emqx_ds:iterator_id()) -> ok.
|
||||
ensure_iterator_closed_on_all_shards(IteratorID) ->
|
||||
%% Note: currently, shards map 1:1 to nodes, but this will change in the future.
|
||||
Nodes = emqx:running_nodes(),
|
||||
Results = emqx_persistent_session_ds_proto_v1:close_iterator(Nodes, IteratorID),
|
||||
%% TODO: handle errors
|
||||
true = lists:all(fun(Res) -> Res =:= {ok, ok} end, Results),
|
||||
ok.
|
||||
|
||||
%% RPC target.
|
||||
-spec do_ensure_iterator_closed(emqx_ds:iterator_id()) -> ok.
|
||||
do_ensure_iterator_closed(IteratorID) ->
|
||||
ok = emqx_ds_storage_layer:discard_iterator(?DS_SHARD, IteratorID),
|
||||
ok.
|
||||
|
||||
-spec ensure_all_iterators_closed(id()) -> ok.
|
||||
ensure_all_iterators_closed(DSSessionID) ->
|
||||
%% Note: currently, shards map 1:1 to nodes, but this will change in the future.
|
||||
Nodes = emqx:running_nodes(),
|
||||
Results = emqx_persistent_session_ds_proto_v1:close_all_iterators(Nodes, DSSessionID),
|
||||
%% TODO: handle errors
|
||||
true = lists:all(fun(Res) -> Res =:= {ok, ok} end, Results),
|
||||
ok.
|
||||
|
||||
%% RPC target.
|
||||
-spec do_ensure_all_iterators_closed(id()) -> ok.
|
||||
do_ensure_all_iterators_closed(DSSessionID) ->
|
||||
ok = emqx_ds_storage_layer:discard_iterator_prefix(?DS_SHARD, DSSessionID),
|
||||
ok.
|
||||
persistent_session_ds_subscription_route_delete,
|
||||
#{session_id => DSSessionId},
|
||||
ok = emqx_persistent_session_ds_router:do_delete_route(TopicFilterBin, DSSessionId)
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Session tables operations
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-define(SESSION_TAB, emqx_ds_session).
|
||||
-define(ITERATOR_REF_TAB, emqx_ds_iterator_ref).
|
||||
-define(DS_MRIA_SHARD, emqx_ds_shard).
|
||||
|
||||
-record(session, {
|
||||
%% same as clientid
|
||||
id :: id(),
|
||||
%% creation time
|
||||
created_at :: _Millisecond :: non_neg_integer(),
|
||||
expires_at = never :: _Millisecond :: non_neg_integer() | never,
|
||||
%% for future usage
|
||||
props = #{} :: map()
|
||||
}).
|
||||
|
||||
-record(iterator_ref, {
|
||||
ref_id :: {id(), emqx_ds:topic_filter()},
|
||||
it_id :: emqx_ds:iterator_id(),
|
||||
start_time :: emqx_ds:time(),
|
||||
props = #{} :: map()
|
||||
}).
|
||||
|
||||
create_tables() ->
|
||||
ok = emqx_ds:open_db(?PERSISTENT_MESSAGE_DB, #{
|
||||
backend => builtin,
|
||||
storage => {emqx_ds_storage_bitfield_lts, #{}}
|
||||
}),
|
||||
ok = mria:create_table(
|
||||
?SESSION_TAB,
|
||||
[
|
||||
|
@ -500,15 +444,38 @@ create_tables() ->
|
|||
]
|
||||
),
|
||||
ok = mria:create_table(
|
||||
?ITERATOR_REF_TAB,
|
||||
?SESSION_SUBSCRIPTIONS_TAB,
|
||||
[
|
||||
{rlog_shard, ?DS_MRIA_SHARD},
|
||||
{type, ordered_set},
|
||||
{storage, storage()},
|
||||
{record_name, iterator_ref},
|
||||
{attributes, record_info(fields, iterator_ref)}
|
||||
{record_name, ds_sub},
|
||||
{attributes, record_info(fields, ds_sub)}
|
||||
]
|
||||
),
|
||||
ok = mria:create_table(
|
||||
?SESSION_STREAM_TAB,
|
||||
[
|
||||
{rlog_shard, ?DS_MRIA_SHARD},
|
||||
{type, bag},
|
||||
{storage, storage()},
|
||||
{record_name, ds_stream},
|
||||
{attributes, record_info(fields, ds_stream)}
|
||||
]
|
||||
),
|
||||
ok = mria:create_table(
|
||||
?SESSION_ITER_TAB,
|
||||
[
|
||||
{rlog_shard, ?DS_MRIA_SHARD},
|
||||
{type, set},
|
||||
{storage, storage()},
|
||||
{record_name, ds_iter},
|
||||
{attributes, record_info(fields, ds_iter)}
|
||||
]
|
||||
),
|
||||
ok = mria:wait_for_tables([
|
||||
?SESSION_TAB, ?SESSION_SUBSCRIPTIONS_TAB, ?SESSION_STREAM_TAB, ?SESSION_ITER_TAB
|
||||
]),
|
||||
ok.
|
||||
|
||||
-dialyzer({nowarn_function, storage/0}).
|
||||
|
@ -529,26 +496,26 @@ storage() ->
|
|||
%% Note: session API doesn't handle session takeovers, it's the job of
|
||||
%% the broker.
|
||||
-spec session_open(id()) ->
|
||||
{ok, session(), iterators()} | false.
|
||||
{ok, session(), #{topic() => subscription()}} | false.
|
||||
session_open(SessionId) ->
|
||||
transaction(fun() ->
|
||||
case mnesia:read(?SESSION_TAB, SessionId, write) of
|
||||
[Record = #session{}] ->
|
||||
Session = export_record(Record),
|
||||
IteratorRefs = session_read_iterators(SessionId),
|
||||
Iterators = export_iterators(IteratorRefs),
|
||||
{ok, Session, Iterators};
|
||||
Session = export_session(Record),
|
||||
DSSubs = session_read_subscriptions(SessionId),
|
||||
Subscriptions = export_subscriptions(DSSubs),
|
||||
{ok, Session, Subscriptions};
|
||||
[] ->
|
||||
false
|
||||
end
|
||||
end).
|
||||
|
||||
-spec session_ensure_new(id(), _Props :: map()) ->
|
||||
{ok, session(), iterators()}.
|
||||
{ok, session(), #{topic() => subscription()}}.
|
||||
session_ensure_new(SessionId, Props) ->
|
||||
transaction(fun() ->
|
||||
ok = session_drop_iterators(SessionId),
|
||||
Session = export_record(session_create(SessionId, Props)),
|
||||
ok = session_drop_subscriptions(SessionId),
|
||||
Session = export_session(session_create(SessionId, Props)),
|
||||
{ok, Session, #{}}
|
||||
end).
|
||||
|
||||
|
@ -557,7 +524,8 @@ session_create(SessionId, Props) ->
|
|||
id = SessionId,
|
||||
created_at = erlang:system_time(millisecond),
|
||||
expires_at = never,
|
||||
props = Props
|
||||
props = Props,
|
||||
inflight = emqx_persistent_message_ds_replayer:new()
|
||||
},
|
||||
ok = mnesia:write(?SESSION_TAB, Session, write),
|
||||
Session.
|
||||
|
@ -568,80 +536,143 @@ session_create(SessionId, Props) ->
|
|||
session_drop(DSSessionId) ->
|
||||
transaction(fun() ->
|
||||
%% TODO: ensure all iterators from this clientid are closed?
|
||||
ok = session_drop_iterators(DSSessionId),
|
||||
ok = session_drop_subscriptions(DSSessionId),
|
||||
ok = mnesia:delete(?SESSION_TAB, DSSessionId, write)
|
||||
end).
|
||||
|
||||
session_drop_iterators(DSSessionId) ->
|
||||
IteratorRefs = session_read_iterators(DSSessionId),
|
||||
ok = lists:foreach(fun session_del_iterator/1, IteratorRefs).
|
||||
session_drop_subscriptions(DSSessionId) ->
|
||||
IteratorRefs = session_read_subscriptions(DSSessionId),
|
||||
ok = lists:foreach(fun session_del_subscription/1, IteratorRefs).
|
||||
|
||||
%% @doc Called when a client subscribes to a topic. Idempotent.
|
||||
-spec session_add_iterator(id(), topic_filter(), _Props :: map()) ->
|
||||
{ok, iterator(), _IsNew :: boolean()}.
|
||||
session_add_iterator(DSSessionId, TopicFilter, Props) ->
|
||||
IteratorRefId = {DSSessionId, TopicFilter},
|
||||
-spec session_add_subscription(id(), topic_filter(), _Props :: map()) ->
|
||||
{ok, subscription(), _IsNew :: boolean()}.
|
||||
session_add_subscription(DSSessionId, TopicFilter, Props) ->
|
||||
DSSubId = {DSSessionId, TopicFilter},
|
||||
transaction(fun() ->
|
||||
case mnesia:read(?ITERATOR_REF_TAB, IteratorRefId, write) of
|
||||
case mnesia:read(?SESSION_SUBSCRIPTIONS_TAB, DSSubId, write) of
|
||||
[] ->
|
||||
IteratorRef = session_insert_iterator(DSSessionId, TopicFilter, Props),
|
||||
Iterator = export_record(IteratorRef),
|
||||
DSSub = session_insert_subscription(DSSessionId, TopicFilter, Props),
|
||||
DSSubExt = export_subscription(DSSub),
|
||||
?tp(
|
||||
ds_session_subscription_added,
|
||||
#{iterator => Iterator, session_id => DSSessionId}
|
||||
#{sub => DSSubExt, session_id => DSSessionId}
|
||||
),
|
||||
{ok, Iterator, _IsNew = true};
|
||||
[#iterator_ref{} = IteratorRef] ->
|
||||
NIteratorRef = session_update_iterator(IteratorRef, Props),
|
||||
NIterator = export_record(NIteratorRef),
|
||||
{ok, DSSubExt, _IsNew = true};
|
||||
[#ds_sub{} = DSSub] ->
|
||||
NDSSub = session_update_subscription(DSSub, Props),
|
||||
NDSSubExt = export_subscription(NDSSub),
|
||||
?tp(
|
||||
ds_session_subscription_present,
|
||||
#{iterator => NIterator, session_id => DSSessionId}
|
||||
#{sub => NDSSubExt, session_id => DSSessionId}
|
||||
),
|
||||
{ok, NIterator, _IsNew = false}
|
||||
{ok, NDSSubExt, _IsNew = false}
|
||||
end
|
||||
end).
|
||||
|
||||
session_insert_iterator(DSSessionId, TopicFilter, Props) ->
|
||||
{IteratorId, StartMS} = new_iterator_id(DSSessionId),
|
||||
IteratorRef = #iterator_ref{
|
||||
ref_id = {DSSessionId, TopicFilter},
|
||||
it_id = IteratorId,
|
||||
-spec session_insert_subscription(id(), topic_filter(), map()) -> ds_sub().
|
||||
session_insert_subscription(DSSessionId, TopicFilter, Props) ->
|
||||
{DSSubId, StartMS} = new_subscription_id(DSSessionId, TopicFilter),
|
||||
DSSub = #ds_sub{
|
||||
id = DSSubId,
|
||||
start_time = StartMS,
|
||||
props = Props
|
||||
props = Props,
|
||||
extra = #{}
|
||||
},
|
||||
ok = mnesia:write(?ITERATOR_REF_TAB, IteratorRef, write),
|
||||
IteratorRef.
|
||||
ok = mnesia:write(?SESSION_SUBSCRIPTIONS_TAB, DSSub, write),
|
||||
DSSub.
|
||||
|
||||
session_update_iterator(IteratorRef, Props) ->
|
||||
NIteratorRef = IteratorRef#iterator_ref{props = Props},
|
||||
ok = mnesia:write(?ITERATOR_REF_TAB, NIteratorRef, write),
|
||||
NIteratorRef.
|
||||
-spec session_update_subscription(ds_sub(), map()) -> ds_sub().
|
||||
session_update_subscription(DSSub, Props) ->
|
||||
NDSSub = DSSub#ds_sub{props = Props},
|
||||
ok = mnesia:write(?SESSION_SUBSCRIPTIONS_TAB, NDSSub, write),
|
||||
NDSSub.
|
||||
|
||||
%% @doc Called when a client unsubscribes from a topic.
|
||||
-spec session_del_iterator(id(), topic_filter()) -> ok.
|
||||
session_del_iterator(DSSessionId, TopicFilter) ->
|
||||
IteratorRefId = {DSSessionId, TopicFilter},
|
||||
session_del_subscription(DSSessionId, TopicFilter) ->
|
||||
DSSubId = {DSSessionId, TopicFilter},
|
||||
transaction(fun() ->
|
||||
mnesia:delete(?ITERATOR_REF_TAB, IteratorRefId, write)
|
||||
mnesia:delete(?SESSION_SUBSCRIPTIONS_TAB, DSSubId, write)
|
||||
end).
|
||||
|
||||
session_del_iterator(#iterator_ref{ref_id = IteratorRefId}) ->
|
||||
mnesia:delete(?ITERATOR_REF_TAB, IteratorRefId, write).
|
||||
session_del_subscription(#ds_sub{id = DSSubId}) ->
|
||||
mnesia:delete(?SESSION_SUBSCRIPTIONS_TAB, DSSubId, write).
|
||||
|
||||
session_read_iterators(DSSessionId) ->
|
||||
% NOTE: somewhat convoluted way to trick dialyzer
|
||||
Pat = erlang:make_tuple(record_info(size, iterator_ref), '_', [
|
||||
{1, iterator_ref},
|
||||
{#iterator_ref.ref_id, {DSSessionId, '_'}}
|
||||
]),
|
||||
mnesia:match_object(?ITERATOR_REF_TAB, Pat, read).
|
||||
session_read_subscriptions(DSSessionId) ->
|
||||
MS = ets:fun2ms(
|
||||
fun(Sub = #ds_sub{id = {Sess, _}}) when Sess =:= DSSessionId ->
|
||||
Sub
|
||||
end
|
||||
),
|
||||
mnesia:select(?SESSION_SUBSCRIPTIONS_TAB, MS, read).
|
||||
|
||||
-spec new_iterator_id(id()) -> {iterator_id(), emqx_ds:time()}.
|
||||
new_iterator_id(DSSessionId) ->
|
||||
NowMS = erlang:system_time(microsecond),
|
||||
IteratorId = <<DSSessionId/binary, (emqx_guid:gen())/binary>>,
|
||||
{IteratorId, NowMS}.
|
||||
-spec new_subscription_id(id(), topic_filter()) -> {subscription_id(), integer()}.
|
||||
new_subscription_id(DSSessionId, TopicFilter) ->
|
||||
%% Note: here we use _milliseconds_ to match with the timestamp
|
||||
%% field of `#message' record.
|
||||
NowMS = erlang:system_time(millisecond),
|
||||
DSSubId = {DSSessionId, TopicFilter},
|
||||
{DSSubId, NowMS}.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% RPC targets (v1)
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% RPC target.
|
||||
-spec do_open_iterator(emqx_types:words(), emqx_ds:time(), emqx_ds:iterator_id()) ->
|
||||
{ok, emqx_ds_storage_layer:iterator()} | {error, _Reason}.
|
||||
do_open_iterator(_TopicFilter, _StartMS, _IteratorID) ->
|
||||
{error, not_implemented}.
|
||||
|
||||
%% RPC target.
|
||||
-spec do_ensure_iterator_closed(emqx_ds:iterator_id()) -> ok.
|
||||
do_ensure_iterator_closed(_IteratorID) ->
|
||||
ok.
|
||||
|
||||
%% RPC target.
|
||||
-spec do_ensure_all_iterators_closed(id()) -> ok.
|
||||
do_ensure_all_iterators_closed(_DSSessionID) ->
|
||||
ok.
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Reading batches
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
renew_streams(Id) ->
|
||||
Subscriptions = ro_transaction(fun() -> session_read_subscriptions(Id) end),
|
||||
ExistingStreams = ro_transaction(fun() -> mnesia:read(?SESSION_STREAM_TAB, Id) end),
|
||||
lists:foreach(
|
||||
fun(#ds_sub{id = {_, TopicFilter}, start_time = StartTime}) ->
|
||||
renew_streams(Id, ExistingStreams, TopicFilter, StartTime)
|
||||
end,
|
||||
Subscriptions
|
||||
).
|
||||
|
||||
renew_streams(Id, ExistingStreams, TopicFilter, StartTime) ->
|
||||
AllStreams = emqx_ds:get_streams(?PERSISTENT_MESSAGE_DB, TopicFilter, StartTime),
|
||||
transaction(
|
||||
fun() ->
|
||||
lists:foreach(
|
||||
fun({Rank, Stream}) ->
|
||||
Rec = #ds_stream{
|
||||
session = Id,
|
||||
topic_filter = TopicFilter,
|
||||
stream = Stream,
|
||||
rank = Rank
|
||||
},
|
||||
case lists:member(Rec, ExistingStreams) of
|
||||
true ->
|
||||
ok;
|
||||
false ->
|
||||
mnesia:write(?SESSION_STREAM_TAB, Rec, write),
|
||||
{ok, Iterator} = emqx_ds:make_iterator(Stream, TopicFilter, StartTime),
|
||||
IterRec = #ds_iter{id = {Id, Stream}, iter = Iterator},
|
||||
mnesia:write(?SESSION_ITER_TAB, IterRec, write)
|
||||
end
|
||||
end,
|
||||
AllStreams
|
||||
)
|
||||
end
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------------------
|
||||
|
||||
|
@ -649,23 +680,39 @@ transaction(Fun) ->
|
|||
{atomic, Res} = mria:transaction(?DS_MRIA_SHARD, Fun),
|
||||
Res.
|
||||
|
||||
ro_transaction(Fun) ->
|
||||
{atomic, Res} = mria:ro_transaction(?DS_MRIA_SHARD, Fun),
|
||||
Res.
|
||||
|
||||
%%--------------------------------------------------------------------------------
|
||||
|
||||
export_iterators(IteratorRefs) ->
|
||||
export_subscriptions(DSSubs) ->
|
||||
lists:foldl(
|
||||
fun(IteratorRef = #iterator_ref{ref_id = {_DSSessionId, TopicFilter}}, Acc) ->
|
||||
Acc#{TopicFilter => export_record(IteratorRef)}
|
||||
fun(DSSub = #ds_sub{id = {_DSSessionId, TopicFilter}}, Acc) ->
|
||||
Acc#{TopicFilter => export_subscription(DSSub)}
|
||||
end,
|
||||
#{},
|
||||
IteratorRefs
|
||||
DSSubs
|
||||
).
|
||||
|
||||
export_record(#session{} = Record) ->
|
||||
export_record(Record, #session.id, [id, created_at, expires_at, props], #{});
|
||||
export_record(#iterator_ref{} = Record) ->
|
||||
export_record(Record, #iterator_ref.it_id, [id, start_time, props], #{}).
|
||||
export_session(#session{} = Record) ->
|
||||
export_record(Record, #session.id, [id, created_at, expires_at, inflight, props], #{}).
|
||||
|
||||
export_subscription(#ds_sub{} = Record) ->
|
||||
export_record(Record, #ds_sub.start_time, [start_time, props, extra], #{}).
|
||||
|
||||
export_record(Record, I, [Field | Rest], Acc) ->
|
||||
export_record(Record, I + 1, Rest, Acc#{Field => element(I, Record)});
|
||||
export_record(_, _, [], Acc) ->
|
||||
Acc.
|
||||
|
||||
%% TODO: find a more reliable way to perform actions that have side
|
||||
%% effects. Add `CBM:init' callback to the session behavior?
|
||||
ensure_timers() ->
|
||||
ensure_timer(pull),
|
||||
ensure_timer(get_streams).
|
||||
|
||||
-spec ensure_timer(pull | get_streams) -> ok.
|
||||
ensure_timer(Type) ->
|
||||
_ = emqx_utils:start_timer(100, {emqx_session, Type}),
|
||||
ok.
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-ifndef(EMQX_PERSISTENT_SESSION_DS_HRL_HRL).
|
||||
-define(EMQX_PERSISTENT_SESSION_DS_HRL_HRL, true).
|
||||
|
||||
-define(SESSION_TAB, emqx_ds_session).
|
||||
-define(SESSION_SUBSCRIPTIONS_TAB, emqx_ds_session_subscriptions).
|
||||
-define(SESSION_STREAM_TAB, emqx_ds_stream_tab).
|
||||
-define(SESSION_ITER_TAB, emqx_ds_iter_tab).
|
||||
-define(DS_MRIA_SHARD, emqx_ds_session_shard).
|
||||
|
||||
-record(ds_sub, {
|
||||
id :: emqx_persistent_session_ds:subscription_id(),
|
||||
start_time :: emqx_ds:time(),
|
||||
props = #{} :: map(),
|
||||
extra = #{} :: map()
|
||||
}).
|
||||
-type ds_sub() :: #ds_sub{}.
|
||||
|
||||
-record(ds_stream, {
|
||||
session :: emqx_persistent_session_ds:id(),
|
||||
topic_filter :: emqx_ds:topic_filter(),
|
||||
stream :: emqx_ds:stream(),
|
||||
rank :: emqx_ds:stream_rank()
|
||||
}).
|
||||
|
||||
-record(ds_iter, {
|
||||
id :: {emqx_persistent_session_ds:id(), emqx_ds:stream()},
|
||||
iter :: emqx_ds:iterator()
|
||||
}).
|
||||
|
||||
-record(session, {
|
||||
%% same as clientid
|
||||
id :: emqx_persistent_session_ds:id(),
|
||||
%% creation time
|
||||
created_at :: _Millisecond :: non_neg_integer(),
|
||||
expires_at = never :: _Millisecond :: non_neg_integer() | never,
|
||||
inflight :: emqx_persistent_message_ds_replayer:inflight(),
|
||||
%% for future usage
|
||||
props = #{} :: map()
|
||||
}).
|
||||
|
||||
-endif.
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
-export([
|
||||
introduced_in/0,
|
||||
deprecated_since/0,
|
||||
|
||||
open_iterator/4,
|
||||
close_iterator/2,
|
||||
|
@ -31,9 +32,11 @@
|
|||
-define(TIMEOUT, 30_000).
|
||||
|
||||
introduced_in() ->
|
||||
%% FIXME
|
||||
"5.3.0".
|
||||
|
||||
deprecated_since() ->
|
||||
"5.4.0".
|
||||
|
||||
-spec open_iterator(
|
||||
[node()],
|
||||
emqx_types:words(),
|
||||
|
|
|
@ -26,9 +26,7 @@
|
|||
|
||||
-import(emqx_common_test_helpers, [on_exit/1]).
|
||||
|
||||
-define(DEFAULT_KEYSPACE, default).
|
||||
-define(DS_SHARD_ID, <<"local">>).
|
||||
-define(DS_SHARD, {?DEFAULT_KEYSPACE, ?DS_SHARD_ID}).
|
||||
-define(PERSISTENT_MESSAGE_DB, emqx_persistent_message).
|
||||
|
||||
all() ->
|
||||
emqx_common_test_helpers:all(?MODULE).
|
||||
|
@ -48,6 +46,7 @@ init_per_testcase(t_session_subscription_iterators = TestCase, Config) ->
|
|||
Nodes = emqx_cth_cluster:start(Cluster, #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)}),
|
||||
[{nodes, Nodes} | Config];
|
||||
init_per_testcase(TestCase, Config) ->
|
||||
ok = emqx_ds:drop_db(?PERSISTENT_MESSAGE_DB),
|
||||
Apps = emqx_cth_suite:start(
|
||||
app_specs(),
|
||||
#{work_dir => emqx_cth_suite:work_dir(TestCase, Config)}
|
||||
|
@ -58,10 +57,11 @@ end_per_testcase(t_session_subscription_iterators, Config) ->
|
|||
Nodes = ?config(nodes, Config),
|
||||
emqx_common_test_helpers:call_janitor(60_000),
|
||||
ok = emqx_cth_cluster:stop(Nodes),
|
||||
ok;
|
||||
end_per_testcase(common, Config);
|
||||
end_per_testcase(_TestCase, Config) ->
|
||||
Apps = ?config(apps, Config),
|
||||
Apps = proplists:get_value(apps, Config, []),
|
||||
emqx_common_test_helpers:call_janitor(60_000),
|
||||
clear_db(),
|
||||
emqx_cth_suite:stop(Apps),
|
||||
ok.
|
||||
|
||||
|
@ -95,14 +95,15 @@ t_messages_persisted(_Config) ->
|
|||
Results = [emqtt:publish(CP, Topic, Payload, 1) || {Topic, Payload} <- Messages],
|
||||
|
||||
ct:pal("Results = ~p", [Results]),
|
||||
timer:sleep(2000),
|
||||
|
||||
Persisted = consume(?DS_SHARD, {['#'], 0}),
|
||||
Persisted = consume(['#'], 0),
|
||||
|
||||
ct:pal("Persisted = ~p", [Persisted]),
|
||||
|
||||
?assertEqual(
|
||||
[M1, M2, M5, M7, M9, M10],
|
||||
[{emqx_message:topic(M), emqx_message:payload(M)} || M <- Persisted]
|
||||
lists:sort([M1, M2, M5, M7, M9, M10]),
|
||||
lists:sort([{emqx_message:topic(M), emqx_message:payload(M)} || M <- Persisted])
|
||||
),
|
||||
|
||||
ok.
|
||||
|
@ -139,23 +140,25 @@ t_messages_persisted_2(_Config) ->
|
|||
{ok, #{reason_code := ?RC_NO_MATCHING_SUBSCRIBERS}} =
|
||||
emqtt:publish(CP, T(<<"client/2/topic">>), <<"8">>, 1),
|
||||
|
||||
Persisted = consume(?DS_SHARD, {['#'], 0}),
|
||||
timer:sleep(2000),
|
||||
|
||||
Persisted = consume(['#'], 0),
|
||||
|
||||
ct:pal("Persisted = ~p", [Persisted]),
|
||||
|
||||
?assertEqual(
|
||||
[
|
||||
lists:sort([
|
||||
{T(<<"client/1/topic">>), <<"4">>},
|
||||
{T(<<"client/2/topic">>), <<"5">>}
|
||||
],
|
||||
[{emqx_message:topic(M), emqx_message:payload(M)} || M <- Persisted]
|
||||
]),
|
||||
lists:sort([{emqx_message:topic(M), emqx_message:payload(M)} || M <- Persisted])
|
||||
),
|
||||
|
||||
ok.
|
||||
|
||||
%% TODO: test quic and ws too
|
||||
t_session_subscription_iterators(Config) ->
|
||||
[Node1, Node2] = ?config(nodes, Config),
|
||||
[Node1, _Node2] = ?config(nodes, Config),
|
||||
Port = get_mqtt_port(Node1, tcp),
|
||||
Topic = <<"t/topic">>,
|
||||
SubTopicFilter = <<"t/+">>,
|
||||
|
@ -202,11 +205,8 @@ t_session_subscription_iterators(Config) ->
|
|||
messages => [Message1, Message2, Message3, Message4]
|
||||
}
|
||||
end,
|
||||
fun(Results, Trace) ->
|
||||
fun(Trace) ->
|
||||
ct:pal("trace:\n ~p", [Trace]),
|
||||
#{
|
||||
messages := [_Message1, Message2, Message3 | _]
|
||||
} = Results,
|
||||
case ?of_kind(ds_session_subscription_added, Trace) of
|
||||
[] ->
|
||||
%% Since `emqx_durable_storage' is a dependency of `emqx', it gets
|
||||
|
@ -228,17 +228,6 @@ t_session_subscription_iterators(Config) ->
|
|||
),
|
||||
ok
|
||||
end,
|
||||
?assertMatch({ok, [_]}, get_all_iterator_ids(Node1)),
|
||||
{ok, [IteratorId]} = get_all_iterator_ids(Node1),
|
||||
?assertMatch({ok, [IteratorId]}, get_all_iterator_ids(Node2)),
|
||||
ReplayMessages1 = erpc:call(Node1, fun() -> consume(?DS_SHARD, IteratorId) end),
|
||||
ExpectedMessages = [Message2, Message3],
|
||||
%% Note: it is expected that this will break after replayers are in place.
|
||||
%% They might have consumed all the messages by this time.
|
||||
?assertEqual(ExpectedMessages, ReplayMessages1),
|
||||
%% Different DS shard
|
||||
ReplayMessages2 = erpc:call(Node2, fun() -> consume(?DS_SHARD, IteratorId) end),
|
||||
?assertEqual([], ReplayMessages2),
|
||||
ok
|
||||
end
|
||||
),
|
||||
|
@ -263,33 +252,26 @@ connect(Opts0 = #{}) ->
|
|||
{ok, _} = emqtt:connect(Client),
|
||||
Client.
|
||||
|
||||
consume(Shard, Replay = {_TopicFiler, _StartMS}) ->
|
||||
{ok, It} = emqx_ds_storage_layer:make_iterator(Shard, Replay),
|
||||
consume(It);
|
||||
consume(Shard, IteratorId) when is_binary(IteratorId) ->
|
||||
{ok, It} = emqx_ds_storage_layer:restore_iterator(Shard, IteratorId),
|
||||
consume(It).
|
||||
consume(TopicFilter, StartMS) ->
|
||||
Streams = emqx_ds:get_streams(?PERSISTENT_MESSAGE_DB, TopicFilter, StartMS),
|
||||
lists:flatmap(
|
||||
fun({_Rank, Stream}) ->
|
||||
{ok, It} = emqx_ds:make_iterator(Stream, TopicFilter, StartMS),
|
||||
consume(It)
|
||||
end,
|
||||
Streams
|
||||
).
|
||||
|
||||
consume(It) ->
|
||||
case emqx_ds_storage_layer:next(It) of
|
||||
{value, Msg, NIt} ->
|
||||
[emqx_persistent_message:deserialize(Msg) | consume(NIt)];
|
||||
none ->
|
||||
case emqx_ds:next(It, 100) of
|
||||
{ok, _NIt, _Msgs = []} ->
|
||||
[];
|
||||
{ok, NIt, Msgs} ->
|
||||
Msgs ++ consume(NIt);
|
||||
{ok, end_of_stream} ->
|
||||
[]
|
||||
end.
|
||||
|
||||
delete_all_messages() ->
|
||||
Persisted = consume(?DS_SHARD, {['#'], 0}),
|
||||
lists:foreach(
|
||||
fun(Msg) ->
|
||||
GUID = emqx_message:id(Msg),
|
||||
Topic = emqx_topic:words(emqx_message:topic(Msg)),
|
||||
Timestamp = emqx_guid:timestamp(GUID),
|
||||
ok = emqx_ds_storage_layer:delete(?DS_SHARD, GUID, Timestamp, Topic)
|
||||
end,
|
||||
Persisted
|
||||
).
|
||||
|
||||
receive_messages(Count) ->
|
||||
receive_messages(Count, []).
|
||||
|
||||
|
@ -306,13 +288,6 @@ receive_messages(Count, Msgs) ->
|
|||
publish(Node, Message) ->
|
||||
erpc:call(Node, emqx, publish, [Message]).
|
||||
|
||||
get_iterator_ids(Node, ClientId) ->
|
||||
Channel = erpc:call(Node, fun() ->
|
||||
[ConnPid] = emqx_cm:lookup_channels(ClientId),
|
||||
sys:get_state(ConnPid)
|
||||
end),
|
||||
emqx_connection:info({channel, {session, iterators}}, Channel).
|
||||
|
||||
app_specs() ->
|
||||
[
|
||||
emqx_durable_storage,
|
||||
|
@ -330,5 +305,6 @@ get_mqtt_port(Node, Type) ->
|
|||
{_IP, Port} = erpc:call(Node, emqx_config, get, [[listeners, Type, default, bind]]),
|
||||
Port.
|
||||
|
||||
get_all_iterator_ids(Node) ->
|
||||
erpc:call(Node, emqx_ds_storage_layer, list_iterator_prefix, [?DS_SHARD, <<>>]).
|
||||
clear_db() ->
|
||||
ok = emqx_ds:drop_db(?PERSISTENT_MESSAGE_DB),
|
||||
ok.
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-define(PERSISTENT_MESSAGE_DB, emqx_persistent_message).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% SUITE boilerplate
|
||||
%%--------------------------------------------------------------------
|
||||
|
@ -131,6 +133,7 @@ get_listener_port(Type, Name) ->
|
|||
end_per_group(Group, Config) when Group == tcp; Group == ws; Group == quic ->
|
||||
ok = emqx_cth_suite:stop(?config(group_apps, Config));
|
||||
end_per_group(_, _Config) ->
|
||||
ok = emqx_ds:drop_db(?PERSISTENT_MESSAGE_DB),
|
||||
ok.
|
||||
|
||||
init_per_testcase(TestCase, Config) ->
|
||||
|
@ -188,7 +191,7 @@ receive_messages(Count, Msgs) ->
|
|||
receive_messages(Count - 1, [Msg | Msgs]);
|
||||
_Other ->
|
||||
receive_messages(Count, Msgs)
|
||||
after 5000 ->
|
||||
after 15000 ->
|
||||
Msgs
|
||||
end.
|
||||
|
||||
|
@ -227,11 +230,11 @@ wait_for_cm_unregister(ClientId, N) ->
|
|||
end.
|
||||
|
||||
publish(Topic, Payloads) ->
|
||||
publish(Topic, Payloads, false).
|
||||
publish(Topic, Payloads, false, 2).
|
||||
|
||||
publish(Topic, Payloads, WaitForUnregister) ->
|
||||
publish(Topic, Payloads, WaitForUnregister, QoS) ->
|
||||
Fun = fun(Client, Payload) ->
|
||||
{ok, _} = emqtt:publish(Client, Topic, Payload, 2)
|
||||
{ok, _} = emqtt:publish(Client, Topic, Payload, QoS)
|
||||
end,
|
||||
do_publish(Payloads, Fun, WaitForUnregister).
|
||||
|
||||
|
@ -510,6 +513,48 @@ t_process_dies_session_expires(Config) ->
|
|||
|
||||
emqtt:disconnect(Client2).
|
||||
|
||||
t_publish_while_client_is_gone_qos1(Config) ->
|
||||
%% A persistent session should receive messages in its
|
||||
%% subscription even if the process owning the session dies.
|
||||
ConnFun = ?config(conn_fun, Config),
|
||||
Topic = ?config(topic, Config),
|
||||
STopic = ?config(stopic, Config),
|
||||
Payload1 = <<"hello1">>,
|
||||
Payload2 = <<"hello2">>,
|
||||
ClientId = ?config(client_id, Config),
|
||||
{ok, Client1} = emqtt:start_link([
|
||||
{proto_ver, v5},
|
||||
{clientid, ClientId},
|
||||
{properties, #{'Session-Expiry-Interval' => 30}},
|
||||
{clean_start, true}
|
||||
| Config
|
||||
]),
|
||||
{ok, _} = emqtt:ConnFun(Client1),
|
||||
{ok, _, [1]} = emqtt:subscribe(Client1, STopic, qos1),
|
||||
|
||||
ok = emqtt:disconnect(Client1),
|
||||
maybe_kill_connection_process(ClientId, Config),
|
||||
|
||||
ok = publish(Topic, [Payload1, Payload2], false, 1),
|
||||
|
||||
{ok, Client2} = emqtt:start_link([
|
||||
{proto_ver, v5},
|
||||
{clientid, ClientId},
|
||||
{properties, #{'Session-Expiry-Interval' => 30}},
|
||||
{clean_start, false}
|
||||
| Config
|
||||
]),
|
||||
{ok, _} = emqtt:ConnFun(Client2),
|
||||
Msgs = receive_messages(2),
|
||||
?assertMatch([_, _], Msgs),
|
||||
[Msg2, Msg1] = Msgs,
|
||||
?assertEqual({ok, iolist_to_binary(Payload1)}, maps:find(payload, Msg1)),
|
||||
?assertEqual({ok, 1}, maps:find(qos, Msg1)),
|
||||
?assertEqual({ok, iolist_to_binary(Payload2)}, maps:find(payload, Msg2)),
|
||||
?assertEqual({ok, 1}, maps:find(qos, Msg2)),
|
||||
|
||||
ok = emqtt:disconnect(Client2).
|
||||
|
||||
t_publish_while_client_is_gone(init, Config) -> skip_ds_tc(Config);
|
||||
t_publish_while_client_is_gone('end', _Config) -> ok.
|
||||
t_publish_while_client_is_gone(Config) ->
|
||||
|
|
|
@ -31,48 +31,6 @@ Read pattern: pseudoserial
|
|||
|
||||
Number of records: O(total write throughput * retention time)
|
||||
|
||||
## Session storage
|
||||
|
||||
Data there is updated when:
|
||||
|
||||
- A new client connects with clean session = false
|
||||
- Client subscribes to a topic
|
||||
- Client unsubscribes to a topic
|
||||
- Garbage collection is performed
|
||||
|
||||
Write throughput: low
|
||||
|
||||
Data is read when a client connects and replay agents are started
|
||||
|
||||
Read throughput: low
|
||||
|
||||
Data format:
|
||||
|
||||
`#session{clientId = "foobar", iterators = [ItKey1, ItKey2, ItKey3, ...]}`
|
||||
|
||||
Number of records: O(N clients)
|
||||
|
||||
Size of record: O(N subscriptions per clients)
|
||||
|
||||
## Iterator storage
|
||||
|
||||
Data is written every time a client acks a message.
|
||||
|
||||
Data is read when a client reconnects and we restart replay agents.
|
||||
|
||||
`#iterator{key = IterKey, data = Blob}`
|
||||
|
||||
Number of records: O(N clients * N subscriptions per client)
|
||||
|
||||
Size of record: O(1)
|
||||
|
||||
Write throughput: high, lots of small updates
|
||||
|
||||
Write pattern: mostly key overwrite
|
||||
|
||||
Read throughput: low
|
||||
|
||||
Read pattern: random
|
||||
|
||||
# Push vs. Pull model
|
||||
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
# EMQX Replay
|
||||
|
||||
`emqx_ds` is a durable storage for MQTT messages within EMQX.
|
||||
It implements the following scenarios:
|
||||
- Persisting messages published by clients
|
||||
-
|
||||
`emqx_ds` is a generic durable storage for MQTT messages within EMQX.
|
||||
|
||||
Concepts:
|
||||
|
||||
|
||||
|
||||
> 0. App overview introduction
|
||||
> 1. let people know what your project can do specifically. Is it a base
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-ifndef(EMQX_DS_HRL_HRL).
|
||||
-define(EMQX_DS_HRL_HRL, true).
|
||||
|
||||
-endif.
|
|
@ -0,0 +1,3 @@
|
|||
%% -*- mode:erlang -*-
|
||||
{deps,
|
||||
[{emqx_utils, {path, "../emqx_utils"}}]}.
|
|
@ -13,50 +13,48 @@
|
|||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% @doc Main interface module for `emqx_durable_storage' application.
|
||||
%%
|
||||
%% It takes care of forwarding calls to the underlying DBMS. Currently
|
||||
%% only the embedded `emqx_ds_replication_layer' storage is supported,
|
||||
%% so all the calls are simply passed through.
|
||||
-module(emqx_ds).
|
||||
|
||||
-include_lib("stdlib/include/ms_transform.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
%% Management API:
|
||||
-export([open_db/2, drop_db/1]).
|
||||
|
||||
%% API:
|
||||
-export([ensure_shard/2]).
|
||||
%% Messages:
|
||||
-export([message_store/2, message_store/1, message_stats/0]).
|
||||
%% Iterator:
|
||||
-export([iterator_update/2, iterator_next/1, iterator_stats/0]).
|
||||
%% Message storage API:
|
||||
-export([store_batch/2, store_batch/3]).
|
||||
|
||||
%% internal exports:
|
||||
%% Message replay API:
|
||||
-export([get_streams/3, make_iterator/3, next/2]).
|
||||
|
||||
%% Misc. API:
|
||||
-export([]).
|
||||
|
||||
-export_type([
|
||||
keyspace/0,
|
||||
message_id/0,
|
||||
message_stats/0,
|
||||
message_store_opts/0,
|
||||
replay/0,
|
||||
replay_id/0,
|
||||
iterator_id/0,
|
||||
iterator/0,
|
||||
shard/0,
|
||||
shard_id/0,
|
||||
topic/0,
|
||||
create_db_opts/0,
|
||||
builtin_db_opts/0,
|
||||
db/0,
|
||||
time/0,
|
||||
topic_filter/0,
|
||||
time/0
|
||||
topic/0,
|
||||
stream/0,
|
||||
stream_rank/0,
|
||||
iterator/0,
|
||||
message_id/0,
|
||||
next_result/1, next_result/0,
|
||||
store_batch_result/0,
|
||||
make_iterator_result/1, make_iterator_result/0,
|
||||
get_iterator_result/1
|
||||
]).
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
-type iterator() :: term().
|
||||
|
||||
-type iterator_id() :: binary().
|
||||
|
||||
-type message_store_opts() :: #{}.
|
||||
|
||||
-type message_stats() :: #{}.
|
||||
|
||||
-type message_id() :: binary().
|
||||
-type db() :: atom().
|
||||
|
||||
%% Parsed topic.
|
||||
-type topic() :: list(binary()).
|
||||
|
@ -64,9 +62,22 @@
|
|||
%% Parsed topic filter.
|
||||
-type topic_filter() :: list(binary() | '+' | '#' | '').
|
||||
|
||||
-type keyspace() :: atom().
|
||||
-type shard_id() :: binary().
|
||||
-type shard() :: {keyspace(), shard_id()}.
|
||||
-type stream_rank() :: {term(), integer()}.
|
||||
|
||||
-opaque stream() :: emqx_ds_replication_layer:stream().
|
||||
|
||||
-opaque iterator() :: emqx_ds_replication_layer:iterator().
|
||||
|
||||
-type store_batch_result() :: ok | {error, _}.
|
||||
|
||||
-type make_iterator_result(Iterator) :: {ok, Iterator} | {error, _}.
|
||||
|
||||
-type make_iterator_result() :: make_iterator_result(iterator()).
|
||||
|
||||
-type next_result(Iterator) ::
|
||||
{ok, Iterator, [emqx_types:message()]} | {ok, end_of_stream} | {error, _}.
|
||||
|
||||
-type next_result() :: next_result(iterator()).
|
||||
|
||||
%% Timestamp
|
||||
%% Earliest possible timestamp is 0.
|
||||
|
@ -74,70 +85,102 @@
|
|||
%% use in emqx_guid. Otherwise, the iterators won't match the message timestamps.
|
||||
-type time() :: non_neg_integer().
|
||||
|
||||
-type replay_id() :: binary().
|
||||
-type message_store_opts() :: #{}.
|
||||
|
||||
-type replay() :: {
|
||||
_TopicFilter :: topic_filter(),
|
||||
_StartTime :: time()
|
||||
}.
|
||||
-type builtin_db_opts() ::
|
||||
#{
|
||||
backend := builtin,
|
||||
storage := emqx_ds_storage_layer:prototype()
|
||||
}.
|
||||
|
||||
-type create_db_opts() ::
|
||||
builtin_db_opts().
|
||||
|
||||
-type message_id() :: emqx_ds_replication_layer:message_id().
|
||||
|
||||
-type get_iterator_result(Iterator) :: {ok, Iterator} | undefined.
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
-spec ensure_shard(shard(), emqx_ds_storage_layer:options()) ->
|
||||
ok | {error, _Reason}.
|
||||
ensure_shard(Shard, Options) ->
|
||||
case emqx_ds_storage_layer_sup:start_shard(Shard, Options) of
|
||||
{ok, _Pid} ->
|
||||
ok;
|
||||
{error, {already_started, _Pid}} ->
|
||||
ok;
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
%% @doc Different DBs are completely independent from each other. They
|
||||
%% could represent something like different tenants.
|
||||
-spec open_db(db(), create_db_opts()) -> ok.
|
||||
open_db(DB, Opts = #{backend := builtin}) ->
|
||||
emqx_ds_replication_layer:open_db(DB, Opts).
|
||||
|
||||
%%--------------------------------------------------------------------------------
|
||||
%% Message
|
||||
%%--------------------------------------------------------------------------------
|
||||
-spec message_store([emqx_types:message()], message_store_opts()) ->
|
||||
{ok, [message_id()]} | {error, _}.
|
||||
message_store(_Msg, _Opts) ->
|
||||
%% TODO
|
||||
{error, not_implemented}.
|
||||
%% @doc TODO: currently if one or a few shards are down, they won't be
|
||||
|
||||
-spec message_store([emqx_types:message()]) -> {ok, [message_id()]} | {error, _}.
|
||||
message_store(Msg) ->
|
||||
%% TODO
|
||||
message_store(Msg, #{}).
|
||||
%% deleted.
|
||||
-spec drop_db(db()) -> ok.
|
||||
drop_db(DB) ->
|
||||
emqx_ds_replication_layer:drop_db(DB).
|
||||
|
||||
-spec message_stats() -> message_stats().
|
||||
message_stats() ->
|
||||
#{}.
|
||||
-spec store_batch(db(), [emqx_types:message()], message_store_opts()) -> store_batch_result().
|
||||
store_batch(DB, Msgs, Opts) ->
|
||||
emqx_ds_replication_layer:store_batch(DB, Msgs, Opts).
|
||||
|
||||
%%--------------------------------------------------------------------------------
|
||||
%% Session
|
||||
%%--------------------------------------------------------------------------------
|
||||
-spec store_batch(db(), [emqx_types:message()]) -> store_batch_result().
|
||||
store_batch(DB, Msgs) ->
|
||||
store_batch(DB, Msgs, #{}).
|
||||
|
||||
%%--------------------------------------------------------------------------------
|
||||
%% Iterator (pull API)
|
||||
%%--------------------------------------------------------------------------------
|
||||
%% @doc Get a list of streams needed for replaying a topic filter.
|
||||
%%
|
||||
%% Motivation: under the hood, EMQX may store different topics at
|
||||
%% different locations or even in different databases. A wildcard
|
||||
%% topic filter may require pulling data from any number of locations.
|
||||
%%
|
||||
%% Stream is an abstraction exposed by `emqx_ds' that, on one hand,
|
||||
%% reflects the notion that different topics can be stored
|
||||
%% differently, but hides the implementation details.
|
||||
%%
|
||||
%% While having to work with multiple iterators to replay a topic
|
||||
%% filter may be cumbersome, it opens up some possibilities:
|
||||
%%
|
||||
%% 1. It's possible to parallelize replays
|
||||
%%
|
||||
%% 2. Streams can be shared between different clients to implement
|
||||
%% shared subscriptions
|
||||
%%
|
||||
%% IMPORTANT RULES:
|
||||
%%
|
||||
%% 0. There is no 1-to-1 mapping between MQTT topics and streams. One
|
||||
%% stream can contain any number of MQTT topics.
|
||||
%%
|
||||
%% 1. New streams matching the topic filter and start time can appear
|
||||
%% without notice, so the replayer must periodically call this
|
||||
%% function to get the updated list of streams.
|
||||
%%
|
||||
%% 2. Streams may depend on one another. Therefore, care should be
|
||||
%% taken while replaying them in parallel to avoid out-of-order
|
||||
%% replay. This function returns stream together with its
|
||||
%% "coordinate": `stream_rank()'.
|
||||
%%
|
||||
%% Stream rank is a tuple of two integers, let's call them X and Y. If
|
||||
%% X coordinate of two streams is different, they are independent and
|
||||
%% can be replayed in parallel. If it's the same, then the stream with
|
||||
%% smaller Y coordinate should be replayed first. If Y coordinates are
|
||||
%% equal, then the streams are independent.
|
||||
%%
|
||||
%% Stream is fully consumed when `next/3' function returns
|
||||
%% `end_of_stream'. Then and only then the client can proceed to
|
||||
%% replaying streams that depend on the given one.
|
||||
-spec get_streams(db(), topic_filter(), time()) -> [{stream_rank(), stream()}].
|
||||
get_streams(DB, TopicFilter, StartTime) ->
|
||||
emqx_ds_replication_layer:get_streams(DB, TopicFilter, StartTime).
|
||||
|
||||
%% @doc Called when a client acks a message
|
||||
-spec iterator_update(iterator_id(), iterator()) -> ok.
|
||||
iterator_update(_IterId, _Iter) ->
|
||||
%% TODO
|
||||
ok.
|
||||
-spec make_iterator(stream(), topic_filter(), time()) -> make_iterator_result().
|
||||
make_iterator(Stream, TopicFilter, StartTime) ->
|
||||
emqx_ds_replication_layer:make_iterator(Stream, TopicFilter, StartTime).
|
||||
|
||||
%% @doc Called when a client acks a message
|
||||
-spec iterator_next(iterator()) -> {value, emqx_types:message(), iterator()} | none | {error, _}.
|
||||
iterator_next(_Iter) ->
|
||||
%% TODO
|
||||
none.
|
||||
-spec next(iterator(), pos_integer()) -> next_result().
|
||||
next(Iter, BatchSize) ->
|
||||
emqx_ds_replication_layer:next(Iter, BatchSize).
|
||||
|
||||
-spec iterator_stats() -> #{}.
|
||||
iterator_stats() ->
|
||||
#{}.
|
||||
%%================================================================================
|
||||
%% Internal exports
|
||||
%%================================================================================
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-ifndef(EMQX_DS_BITMASK_HRL).
|
||||
-define(EMQX_DS_BITMASK_HRL, true).
|
||||
|
||||
-record(filter_scan_action, {
|
||||
offset :: emqx_ds_bitmask_keymapper:offset(),
|
||||
size :: emqx_ds_bitmask_keymapper:bitsize(),
|
||||
min :: non_neg_integer(),
|
||||
max :: non_neg_integer()
|
||||
}).
|
||||
|
||||
-record(filter, {
|
||||
size :: non_neg_integer(),
|
||||
bitfilter :: non_neg_integer(),
|
||||
bitmask :: non_neg_integer(),
|
||||
%% Ranges (in _bitsource_ basis):
|
||||
bitsource_ranges :: array:array(#filter_scan_action{}),
|
||||
range_min :: non_neg_integer(),
|
||||
range_max :: non_neg_integer()
|
||||
}).
|
||||
|
||||
-endif.
|
|
@ -0,0 +1,824 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_bitmask_keymapper).
|
||||
|
||||
%%================================================================================
|
||||
%% @doc This module is used to map N-dimensional coordinates to a
|
||||
%% 1-dimensional space.
|
||||
%%
|
||||
%% Example:
|
||||
%%
|
||||
%% Let us assume that `T' is a topic and `t' is time. These are the two
|
||||
%% dimensions used to index messages. They can be viewed as
|
||||
%% "coordinates" of an MQTT message in a 2D space.
|
||||
%%
|
||||
%% Oftentimes, when wildcard subscription is used, keys must be
|
||||
%% scanned in both dimensions simultaneously.
|
||||
%%
|
||||
%% Rocksdb allows to iterate over sorted keys very fast. This means we
|
||||
%% need to map our two-dimentional keys to a single index that is
|
||||
%% sorted in a way that helps to iterate over both time and topic
|
||||
%% without having to do a lot of random seeks.
|
||||
%%
|
||||
%% == Mapping of 2D keys to rocksdb keys ==
|
||||
%%
|
||||
%% We use "zigzag" pattern to store messages, where rocksdb key is
|
||||
%% composed like like this:
|
||||
%%
|
||||
%% |ttttt|TTTTTTTTT|tttt|
|
||||
%% ^ ^ ^
|
||||
%% | | |
|
||||
%% +-------+ | +---------+
|
||||
%% | | |
|
||||
%% most significant topic hash least significant
|
||||
%% bits of timestamp bits of timestamp
|
||||
%% (a.k.a epoch) (a.k.a time offset)
|
||||
%%
|
||||
%% Topic hash is level-aware: each topic level is hashed separately
|
||||
%% and the resulting hashes are bitwise-concatentated. This allows us
|
||||
%% to map topics to fixed-length bitstrings while keeping some degree
|
||||
%% of information about the hierarchy.
|
||||
%%
|
||||
%% Next important concept is what we call "epoch". Duration of the
|
||||
%% epoch is determined by maximum time offset. Epoch is calculated by
|
||||
%% shifting bits of the timestamp right.
|
||||
%%
|
||||
%% The resulting index is a space-filling curve that looks like
|
||||
%% this in the topic-time 2D space:
|
||||
%%
|
||||
%% T ^ ---->------ |---->------ |---->------
|
||||
%% | --/ / --/ / --/
|
||||
%% | -<-/ | -<-/ | -<-/
|
||||
%% | -/ | -/ | -/
|
||||
%% | ---->------ | ---->------ | ---->------
|
||||
%% | --/ / --/ / --/
|
||||
%% | ---/ | ---/ | ---/
|
||||
%% | -/ ^ -/ ^ -/
|
||||
%% | ---->------ | ---->------ | ---->------
|
||||
%% | --/ / --/ / --/
|
||||
%% | -<-/ | -<-/ | -<-/
|
||||
%% | -/ | -/ | -/
|
||||
%% | ---->------| ---->------| ---------->
|
||||
%% |
|
||||
%% -+------------+-----------------------------> t
|
||||
%% epoch
|
||||
%%
|
||||
%% This structure allows to quickly seek to a the first message that
|
||||
%% was recorded in a certain epoch in a certain topic or a
|
||||
%% group of topics matching filter like `foo/bar/#`.
|
||||
%%
|
||||
%% Due to its structure, for each pair of rocksdb keys K1 and K2, such
|
||||
%% that K1 > K2 and topic(K1) = topic(K2), timestamp(K1) >
|
||||
%% timestamp(K2).
|
||||
%% That is, replay doesn't reorder messages published in each
|
||||
%% individual topic.
|
||||
%%
|
||||
%% This property doesn't hold between different topics, but it's not deemed
|
||||
%% a problem right now.
|
||||
%%
|
||||
%%================================================================================
|
||||
|
||||
%% API:
|
||||
-export([
|
||||
make_keymapper/1,
|
||||
vector_to_key/2,
|
||||
bin_vector_to_key/2,
|
||||
key_to_vector/2,
|
||||
bin_key_to_vector/2,
|
||||
key_to_bitstring/2,
|
||||
bitstring_to_key/2,
|
||||
make_filter/2,
|
||||
ratchet/2,
|
||||
bin_increment/2,
|
||||
bin_checkmask/2,
|
||||
bitsize/1
|
||||
]).
|
||||
|
||||
-export_type([vector/0, key/0, dimension/0, offset/0, bitsize/0, bitsource/0, keymapper/0]).
|
||||
|
||||
-compile(
|
||||
{inline, [
|
||||
ones/1,
|
||||
extract/2,
|
||||
extract_inv/2
|
||||
]}
|
||||
).
|
||||
|
||||
-elvis([{elvis_style, no_if_expression, disable}]).
|
||||
|
||||
-ifdef(TEST).
|
||||
-include_lib("proper/include/proper.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-endif.
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
-type scalar() :: integer().
|
||||
|
||||
-type vector() :: [scalar()].
|
||||
|
||||
%% N-th coordinate of a vector:
|
||||
-type dimension() :: pos_integer().
|
||||
|
||||
-type offset() :: non_neg_integer().
|
||||
|
||||
-type bitsize() :: pos_integer().
|
||||
|
||||
%% The resulting 1D key:
|
||||
-type key() :: non_neg_integer().
|
||||
|
||||
-type bitsource() ::
|
||||
%% Consume `_Size` bits from timestamp starting at `_Offset`th
|
||||
%% bit from N-th element of the input vector:
|
||||
{dimension(), offset(), bitsize()}.
|
||||
|
||||
-record(scan_action, {
|
||||
src_bitmask :: integer(),
|
||||
src_offset :: offset(),
|
||||
dst_offset :: offset()
|
||||
}).
|
||||
|
||||
-type scan_action() :: #scan_action{}.
|
||||
|
||||
-type scanner() :: [[scan_action()]].
|
||||
|
||||
-record(keymapper, {
|
||||
schema :: [bitsource()],
|
||||
scanner :: scanner(),
|
||||
size :: non_neg_integer(),
|
||||
dim_sizeof :: [non_neg_integer()]
|
||||
}).
|
||||
|
||||
-opaque keymapper() :: #keymapper{}.
|
||||
|
||||
-type scalar_range() ::
|
||||
any | {'=', scalar() | infinity} | {'>=', scalar()} | {scalar(), '..', scalar()}.
|
||||
|
||||
-include("emqx_ds_bitmask.hrl").
|
||||
|
||||
-type filter() :: #filter{}.
|
||||
|
||||
%%================================================================================
|
||||
%% API functions
|
||||
%%================================================================================
|
||||
|
||||
%% @doc Create a keymapper object that stores the "schema" of the
|
||||
%% transformation from a list of bitsources.
|
||||
%%
|
||||
%% Note: Dimension is 1-based.
|
||||
%%
|
||||
%% Note: order of bitsources is important. First element of the list
|
||||
%% is mapped to the _least_ significant bits of the key, and the last
|
||||
%% element becomes most significant bits.
|
||||
-spec make_keymapper([bitsource()]) -> keymapper().
|
||||
make_keymapper(Bitsources) ->
|
||||
Arr0 = array:new([{fixed, false}, {default, {0, []}}]),
|
||||
{Size, Arr} = fold_bitsources(
|
||||
fun(DestOffset, {Dim0, Offset, Size}, Acc) ->
|
||||
Dim = Dim0 - 1,
|
||||
Action = #scan_action{
|
||||
src_bitmask = ones(Size), src_offset = Offset, dst_offset = DestOffset
|
||||
},
|
||||
{DimSizeof, Actions} = array:get(Dim, Acc),
|
||||
array:set(Dim, {DimSizeof + Size, [Action | Actions]}, Acc)
|
||||
end,
|
||||
Arr0,
|
||||
Bitsources
|
||||
),
|
||||
{DimSizeof, Scanner} = lists:unzip(array:to_list(Arr)),
|
||||
#keymapper{
|
||||
schema = Bitsources,
|
||||
scanner = Scanner,
|
||||
size = Size,
|
||||
dim_sizeof = DimSizeof
|
||||
}.
|
||||
|
||||
-spec bitsize(keymapper()) -> pos_integer().
|
||||
bitsize(#keymapper{size = Size}) ->
|
||||
Size.
|
||||
|
||||
%% @doc Map N-dimensional vector to a scalar key.
|
||||
%%
|
||||
%% Note: this function is not injective.
|
||||
-spec vector_to_key(keymapper(), vector()) -> key().
|
||||
vector_to_key(#keymapper{scanner = []}, []) ->
|
||||
0;
|
||||
vector_to_key(#keymapper{scanner = [Actions | Scanner]}, [Coord | Vector]) ->
|
||||
do_vector_to_key(Actions, Scanner, Coord, Vector, 0).
|
||||
|
||||
%% @doc Same as `vector_to_key', but it works with binaries, and outputs a binary.
|
||||
-spec bin_vector_to_key(keymapper(), [binary()]) -> binary().
|
||||
bin_vector_to_key(Keymapper = #keymapper{dim_sizeof = DimSizeof, size = Size}, Binaries) ->
|
||||
Vec = lists:zipwith(
|
||||
fun(Bin, SizeOf) ->
|
||||
<<Int:SizeOf>> = Bin,
|
||||
Int
|
||||
end,
|
||||
Binaries,
|
||||
DimSizeof
|
||||
),
|
||||
Key = vector_to_key(Keymapper, Vec),
|
||||
<<Key:Size>>.
|
||||
|
||||
%% @doc Map key to a vector.
|
||||
%%
|
||||
%% Note: `vector_to_key(key_to_vector(K)) = K' but
|
||||
%% `key_to_vector(vector_to_key(V)) = V' is not guaranteed.
|
||||
-spec key_to_vector(keymapper(), key()) -> vector().
|
||||
key_to_vector(#keymapper{scanner = Scanner}, Key) ->
|
||||
lists:map(
|
||||
fun(Actions) ->
|
||||
lists:foldl(
|
||||
fun(Action, Acc) ->
|
||||
Acc bor extract_inv(Key, Action)
|
||||
end,
|
||||
0,
|
||||
Actions
|
||||
)
|
||||
end,
|
||||
Scanner
|
||||
).
|
||||
|
||||
%% @doc Same as `key_to_vector', but it works with binaries.
|
||||
-spec bin_key_to_vector(keymapper(), binary()) -> [binary()].
|
||||
bin_key_to_vector(Keymapper = #keymapper{dim_sizeof = DimSizeof, size = Size}, BinKey) ->
|
||||
<<Key:Size>> = BinKey,
|
||||
Vector = key_to_vector(Keymapper, Key),
|
||||
lists:zipwith(
|
||||
fun(Elem, SizeOf) ->
|
||||
<<Elem:SizeOf>>
|
||||
end,
|
||||
Vector,
|
||||
DimSizeof
|
||||
).
|
||||
|
||||
%% @doc Transform a bitstring to a key
|
||||
-spec bitstring_to_key(keymapper(), bitstring()) -> key().
|
||||
bitstring_to_key(#keymapper{size = Size}, Bin) ->
|
||||
case Bin of
|
||||
<<Key:Size>> ->
|
||||
Key;
|
||||
_ ->
|
||||
error({invalid_key, Bin, Size})
|
||||
end.
|
||||
|
||||
%% @doc Transform key to a fixed-size bistring
|
||||
-spec key_to_bitstring(keymapper(), key()) -> bitstring().
|
||||
key_to_bitstring(#keymapper{size = Size}, Key) ->
|
||||
<<Key:Size>>.
|
||||
|
||||
%% @doc Create a filter object that facilitates range scans.
|
||||
-spec make_filter(keymapper(), [scalar_range()]) -> filter().
|
||||
make_filter(
|
||||
KeyMapper = #keymapper{schema = Schema, dim_sizeof = DimSizeof, size = TotalSize}, Filter0
|
||||
) ->
|
||||
NDim = length(DimSizeof),
|
||||
%% Transform "symbolic" constraints to ranges:
|
||||
Filter1 = constraints_to_ranges(KeyMapper, Filter0),
|
||||
{Bitmask, Bitfilter} = make_bitfilter(KeyMapper, Filter1),
|
||||
%% Calculate maximum source offset as per bitsource specification:
|
||||
MaxOffset = lists:foldl(
|
||||
fun({Dim, Offset, _Size}, Acc) ->
|
||||
maps:update_with(
|
||||
Dim, fun(OldVal) -> max(OldVal, Offset) end, maps:merge(#{Dim => 0}, Acc)
|
||||
)
|
||||
end,
|
||||
#{},
|
||||
Schema
|
||||
),
|
||||
%% Adjust minimum and maximum values for each interval like this:
|
||||
%%
|
||||
%% Min: 110100|101011 -> 110100|00000
|
||||
%% Max: 110101|001011 -> 110101|11111
|
||||
%% ^
|
||||
%% |
|
||||
%% max offset
|
||||
%%
|
||||
%% This is needed so when we increment the vector, we always scan
|
||||
%% the full range of least significant bits.
|
||||
Filter2 = lists:zipwith(
|
||||
fun
|
||||
({Val, Val}, _Dim) ->
|
||||
{Val, Val};
|
||||
({Min0, Max0}, Dim) ->
|
||||
Offset = maps:get(Dim, MaxOffset, 0),
|
||||
%% Set least significant bits of Min to 0:
|
||||
Min = (Min0 bsr Offset) bsl Offset,
|
||||
%% Set least significant bits of Max to 1:
|
||||
Max = Max0 bor ones(Offset),
|
||||
{Min, Max}
|
||||
end,
|
||||
Filter1,
|
||||
lists:seq(1, NDim)
|
||||
),
|
||||
%% Project the vector into "bitsource coordinate system":
|
||||
{_, Filter} = fold_bitsources(
|
||||
fun(DstOffset, {Dim, SrcOffset, Size}, Acc) ->
|
||||
{Min0, Max0} = lists:nth(Dim, Filter2),
|
||||
Min = (Min0 bsr SrcOffset) band ones(Size),
|
||||
Max = (Max0 bsr SrcOffset) band ones(Size),
|
||||
Action = #filter_scan_action{
|
||||
offset = DstOffset,
|
||||
size = Size,
|
||||
min = Min,
|
||||
max = Max
|
||||
},
|
||||
[Action | Acc]
|
||||
end,
|
||||
[],
|
||||
Schema
|
||||
),
|
||||
Ranges = array:from_list(lists:reverse(Filter)),
|
||||
%% Compute estimated upper and lower bounds of a _continous_
|
||||
%% interval where all keys lie:
|
||||
case Filter of
|
||||
[] ->
|
||||
RangeMin = 0,
|
||||
RangeMax = 0;
|
||||
[#filter_scan_action{offset = MSBOffset, min = MSBMin, max = MSBMax} | _] ->
|
||||
RangeMin = MSBMin bsl MSBOffset,
|
||||
RangeMax = MSBMax bsl MSBOffset bor ones(MSBOffset)
|
||||
end,
|
||||
%% Final value
|
||||
#filter{
|
||||
size = TotalSize,
|
||||
bitmask = Bitmask,
|
||||
bitfilter = Bitfilter,
|
||||
bitsource_ranges = Ranges,
|
||||
range_min = RangeMin,
|
||||
range_max = RangeMax
|
||||
}.
|
||||
|
||||
%% @doc Given a filter `F' and key `K0', return the smallest key `K'
|
||||
%% that satisfies the following conditions:
|
||||
%%
|
||||
%% 1. `K >= K0'
|
||||
%%
|
||||
%% 2. `K' satisfies filter `F'.
|
||||
%%
|
||||
%% If these conditions cannot be satisfied, return `overflow'.
|
||||
%%
|
||||
%% Corollary: `K' may be equal to `K0'.
|
||||
-spec ratchet(filter(), key()) -> key() | overflow.
|
||||
ratchet(#filter{bitsource_ranges = Ranges, range_max = Max}, Key) when Key =< Max ->
|
||||
%% This function works in two steps: first, it finds the position
|
||||
%% of bitsource ("pivot point") corresponding to the part of the
|
||||
%% key that should be incremented (or set to the _minimum_ value
|
||||
%% of the range, in case the respective part of the original key
|
||||
%% is less than the minimum). It also returns "increment": value
|
||||
%% that should be added to the part of the key at the pivot point.
|
||||
%% Increment can be 0 or 1.
|
||||
%%
|
||||
%% Then it transforms the key using the following operation:
|
||||
%%
|
||||
%% 1. Parts of the key that are less than the pivot point are
|
||||
%% reset to their minimum values.
|
||||
%%
|
||||
%% 2. `Increment' is added to the part of the key at the pivot
|
||||
%% point.
|
||||
%%
|
||||
%% 3. The rest of key stays the same
|
||||
NDim = array:size(Ranges),
|
||||
case ratchet_scan(Ranges, NDim, Key, 0, {_Pivot0 = -1, _Increment0 = 0}, _Carry = 0) of
|
||||
overflow ->
|
||||
overflow;
|
||||
{Pivot, Increment} ->
|
||||
ratchet_do(Ranges, Key, NDim - 1, Pivot, Increment)
|
||||
end;
|
||||
ratchet(_, _) ->
|
||||
overflow.
|
||||
|
||||
%% @doc Given a binary representing a key and a filter, return the
|
||||
%% next key matching the filter, or `overflow' if such key doesn't
|
||||
%% exist.
|
||||
-spec bin_increment(filter(), binary()) -> binary() | overflow.
|
||||
bin_increment(Filter = #filter{size = Size}, <<>>) ->
|
||||
Key = ratchet(Filter, 0),
|
||||
<<Key:Size>>;
|
||||
bin_increment(
|
||||
Filter = #filter{size = Size, bitmask = Bitmask, bitfilter = Bitfilter, range_max = RangeMax},
|
||||
KeyBin
|
||||
) ->
|
||||
%% The key may contain random suffix, skip it:
|
||||
<<Key0:Size, _/binary>> = KeyBin,
|
||||
Key1 = Key0 + 1,
|
||||
if
|
||||
Key1 band Bitmask =:= Bitfilter, Key1 =< RangeMax ->
|
||||
<<Key1:Size>>;
|
||||
true ->
|
||||
case ratchet(Filter, Key1) of
|
||||
overflow ->
|
||||
overflow;
|
||||
Key ->
|
||||
<<Key:Size>>
|
||||
end
|
||||
end.
|
||||
|
||||
%% @doc Given a filter and a binary representation of a key, return
|
||||
%% `false' if the key _doesn't_ match the fitler. This function
|
||||
%% returning `true' is necessary, but not sufficient condition that
|
||||
%% the key satisfies the filter.
|
||||
-spec bin_checkmask(filter(), binary()) -> boolean().
|
||||
bin_checkmask(#filter{size = Size, bitmask = Bitmask, bitfilter = Bitfilter}, Key) ->
|
||||
case Key of
|
||||
<<Int:Size>> ->
|
||||
Int band Bitmask =:= Bitfilter;
|
||||
_ ->
|
||||
false
|
||||
end.
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
%% Note: this function operates in bitsource basis, scanning it from 0
|
||||
%% to NDim (i.e. from the least significant bits to the most
|
||||
%% significant bits)
|
||||
ratchet_scan(_Ranges, NDim, _Key, NDim, Pivot, 0) ->
|
||||
%% We've reached the end:
|
||||
Pivot;
|
||||
ratchet_scan(_Ranges, NDim, _Key, NDim, _Pivot, 1) ->
|
||||
%% We've reached the end, but key is still not large enough:
|
||||
overflow;
|
||||
ratchet_scan(Ranges, NDim, Key, I, Pivot0, Carry) ->
|
||||
#filter_scan_action{offset = Offset, size = Size, min = Min, max = Max} = array:get(I, Ranges),
|
||||
%% Extract I-th element of the vector from the original key:
|
||||
Elem = ((Key bsr Offset) band ones(Size)) + Carry,
|
||||
if
|
||||
Elem < Min ->
|
||||
%% I-th coordinate is less than the specified minimum.
|
||||
%%
|
||||
%% We reset this coordinate to the minimum value. It means
|
||||
%% we incremented this bitposition, the less significant
|
||||
%% bits have to be reset to their respective minimum
|
||||
%% values:
|
||||
Pivot = {I + 1, 0},
|
||||
ratchet_scan(Ranges, NDim, Key, I + 1, Pivot, 0);
|
||||
Elem > Max ->
|
||||
%% I-th coordinate is larger than the specified
|
||||
%% minimum. We can only fix this problem by incrementing
|
||||
%% the next coordinate (i.e. more significant bits).
|
||||
%%
|
||||
%% We reset this coordinate to the minimum value, and
|
||||
%% increment the next coordinate (by setting `Carry' to
|
||||
%% 1).
|
||||
Pivot = {I + 1, 1},
|
||||
ratchet_scan(Ranges, NDim, Key, I + 1, Pivot, 1);
|
||||
true ->
|
||||
%% Coordinate is within range:
|
||||
ratchet_scan(Ranges, NDim, Key, I + 1, Pivot0, 0)
|
||||
end.
|
||||
|
||||
%% Note: this function operates in bitsource basis, scanning it from
|
||||
%% NDim to 0. It applies the transformation specified by
|
||||
%% `ratchet_scan'.
|
||||
ratchet_do(_Ranges, _Key, I, _Pivot, _Increment) when I < 0 ->
|
||||
0;
|
||||
ratchet_do(Ranges, Key, I, Pivot, Increment) ->
|
||||
#filter_scan_action{offset = Offset, size = Size, min = Min} = array:get(I, Ranges),
|
||||
Mask = ones(Offset + Size) bxor ones(Offset),
|
||||
Elem =
|
||||
if
|
||||
I > Pivot ->
|
||||
Mask band Key;
|
||||
I =:= Pivot ->
|
||||
(Mask band Key) + (Increment bsl Offset);
|
||||
true ->
|
||||
Min bsl Offset
|
||||
end,
|
||||
%% erlang:display(
|
||||
%% {ratchet_do, I, integer_to_list(Key, 16), integer_to_list(Mask, 2),
|
||||
%% integer_to_list(Elem, 16)}
|
||||
%% ),
|
||||
Elem bor ratchet_do(Ranges, Key, I - 1, Pivot, Increment).
|
||||
|
||||
-spec make_bitfilter(keymapper(), [{non_neg_integer(), non_neg_integer()}]) ->
|
||||
{non_neg_integer(), non_neg_integer()}.
|
||||
make_bitfilter(Keymapper = #keymapper{dim_sizeof = DimSizeof}, Ranges) ->
|
||||
L = lists:zipwith(
|
||||
fun
|
||||
({N, N}, Bits) ->
|
||||
%% For strict equality we can employ bitmask:
|
||||
{ones(Bits), N};
|
||||
(_, _) ->
|
||||
{0, 0}
|
||||
end,
|
||||
Ranges,
|
||||
DimSizeof
|
||||
),
|
||||
{Bitmask, Bitfilter} = lists:unzip(L),
|
||||
{vector_to_key(Keymapper, Bitmask), vector_to_key(Keymapper, Bitfilter)}.
|
||||
|
||||
%% Transform constraints into a list of closed intervals that the
|
||||
%% vector elements should lie in.
|
||||
constraints_to_ranges(#keymapper{dim_sizeof = DimSizeof}, Filter) ->
|
||||
lists:zipwith(
|
||||
fun(Constraint, Bitsize) ->
|
||||
Max = ones(Bitsize),
|
||||
case Constraint of
|
||||
any ->
|
||||
{0, Max};
|
||||
{'=', infinity} ->
|
||||
{Max, Max};
|
||||
{'=', Val} when Val =< Max ->
|
||||
{Val, Val};
|
||||
{'>=', Val} when Val =< Max ->
|
||||
{Val, Max};
|
||||
{A, '..', B} when A =< Max, B =< Max ->
|
||||
{A, B}
|
||||
end
|
||||
end,
|
||||
Filter,
|
||||
DimSizeof
|
||||
).
|
||||
|
||||
-spec fold_bitsources(fun((_DstOffset :: non_neg_integer(), bitsource(), Acc) -> Acc), Acc, [
|
||||
bitsource()
|
||||
]) -> {bitsize(), Acc}.
|
||||
fold_bitsources(Fun, InitAcc, Bitsources) ->
|
||||
lists:foldl(
|
||||
fun(Bitsource = {_Dim, _Offset, Size}, {DstOffset, Acc0}) ->
|
||||
Acc = Fun(DstOffset, Bitsource, Acc0),
|
||||
{DstOffset + Size, Acc}
|
||||
end,
|
||||
{0, InitAcc},
|
||||
Bitsources
|
||||
).
|
||||
|
||||
do_vector_to_key([], [], _Coord, [], Acc) ->
|
||||
Acc;
|
||||
do_vector_to_key([], [NewActions | Scanner], _Coord, [NewCoord | Vector], Acc) ->
|
||||
do_vector_to_key(NewActions, Scanner, NewCoord, Vector, Acc);
|
||||
do_vector_to_key([Action | Actions], Scanner, Coord, Vector, Acc0) ->
|
||||
Acc = Acc0 bor extract(Coord, Action),
|
||||
do_vector_to_key(Actions, Scanner, Coord, Vector, Acc).
|
||||
|
||||
-spec extract(_Source :: scalar(), scan_action()) -> integer().
|
||||
extract(Src, #scan_action{src_bitmask = SrcBitmask, src_offset = SrcOffset, dst_offset = DstOffset}) ->
|
||||
((Src bsr SrcOffset) band SrcBitmask) bsl DstOffset.
|
||||
|
||||
%% extract^-1
|
||||
-spec extract_inv(_Dest :: scalar(), scan_action()) -> integer().
|
||||
extract_inv(Dest, #scan_action{
|
||||
src_bitmask = SrcBitmask, src_offset = SrcOffset, dst_offset = DestOffset
|
||||
}) ->
|
||||
((Dest bsr DestOffset) band SrcBitmask) bsl SrcOffset.
|
||||
|
||||
ones(Bits) ->
|
||||
1 bsl Bits - 1.
|
||||
|
||||
%%================================================================================
|
||||
%% Unit tests
|
||||
%%================================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
make_keymapper0_test() ->
|
||||
Schema = [],
|
||||
?assertEqual(
|
||||
#keymapper{
|
||||
schema = Schema,
|
||||
scanner = [],
|
||||
size = 0,
|
||||
dim_sizeof = []
|
||||
},
|
||||
make_keymapper(Schema)
|
||||
).
|
||||
|
||||
make_keymapper1_test() ->
|
||||
Schema = [{1, 0, 3}, {2, 0, 5}],
|
||||
?assertEqual(
|
||||
#keymapper{
|
||||
schema = Schema,
|
||||
scanner = [
|
||||
[#scan_action{src_bitmask = 2#111, src_offset = 0, dst_offset = 0}],
|
||||
[#scan_action{src_bitmask = 2#11111, src_offset = 0, dst_offset = 3}]
|
||||
],
|
||||
size = 8,
|
||||
dim_sizeof = [3, 5]
|
||||
},
|
||||
make_keymapper(Schema)
|
||||
).
|
||||
|
||||
make_keymapper2_test() ->
|
||||
Schema = [{1, 0, 3}, {2, 0, 5}, {1, 3, 5}],
|
||||
?assertEqual(
|
||||
#keymapper{
|
||||
schema = Schema,
|
||||
scanner = [
|
||||
[
|
||||
#scan_action{src_bitmask = 2#11111, src_offset = 3, dst_offset = 8},
|
||||
#scan_action{src_bitmask = 2#111, src_offset = 0, dst_offset = 0}
|
||||
],
|
||||
[#scan_action{src_bitmask = 2#11111, src_offset = 0, dst_offset = 3}]
|
||||
],
|
||||
size = 13,
|
||||
dim_sizeof = [8, 5]
|
||||
},
|
||||
make_keymapper(Schema)
|
||||
).
|
||||
|
||||
vector_to_key0_test() ->
|
||||
Schema = [],
|
||||
Vector = [],
|
||||
?assertEqual(0, vec2key(Schema, Vector)).
|
||||
|
||||
vector_to_key1_test() ->
|
||||
Schema = [{1, 0, 8}],
|
||||
?assertEqual(16#ff, vec2key(Schema, [16#ff])),
|
||||
?assertEqual(16#1a, vec2key(Schema, [16#1a])),
|
||||
?assertEqual(16#ff, vec2key(Schema, [16#aaff])).
|
||||
|
||||
%% Test handling of source offset:
|
||||
vector_to_key2_test() ->
|
||||
Schema = [{1, 8, 8}],
|
||||
?assertEqual(0, vec2key(Schema, [16#ff])),
|
||||
?assertEqual(16#1a, vec2key(Schema, [16#1aff])),
|
||||
?assertEqual(16#aa, vec2key(Schema, [16#11aaff])).
|
||||
|
||||
%% Basic test of 2D vector:
|
||||
vector_to_key3_test() ->
|
||||
Schema = [{1, 0, 8}, {2, 0, 8}],
|
||||
?assertEqual(16#aaff, vec2key(Schema, [16#ff, 16#aa])),
|
||||
?assertEqual(16#2211, vec2key(Schema, [16#aa11, 16#bb22])).
|
||||
|
||||
%% Advanced test with 2D vector:
|
||||
vector_to_key4_test() ->
|
||||
Schema = [{1, 0, 8}, {2, 0, 8}, {1, 8, 8}, {2, 16, 8}],
|
||||
?assertEqual(16#bb112211, vec2key(Schema, [16#aa1111, 16#bb2222])).
|
||||
|
||||
%% Test with binaries:
|
||||
vector_to_key_bin_test() ->
|
||||
Schema = [{1, 0, 8 * 4}, {2, 0, 8 * 5}, {3, 0, 8 * 5}],
|
||||
Keymapper = make_keymapper(lists:reverse(Schema)),
|
||||
?assertMatch(
|
||||
<<"wellhelloworld">>, bin_vector_to_key(Keymapper, [<<"well">>, <<"hello">>, <<"world">>])
|
||||
).
|
||||
|
||||
key_to_vector0_test() ->
|
||||
Schema = [],
|
||||
key2vec(Schema, []).
|
||||
|
||||
key_to_vector1_test() ->
|
||||
Schema = [{1, 0, 8}, {2, 0, 8}],
|
||||
key2vec(Schema, [1, 1]),
|
||||
key2vec(Schema, [255, 255]),
|
||||
key2vec(Schema, [255, 1]),
|
||||
key2vec(Schema, [0, 1]),
|
||||
key2vec(Schema, [255, 0]).
|
||||
|
||||
key_to_vector2_test() ->
|
||||
Schema = [{1, 0, 3}, {2, 0, 8}, {1, 3, 5}],
|
||||
key2vec(Schema, [1, 1]),
|
||||
key2vec(Schema, [255, 255]),
|
||||
key2vec(Schema, [255, 1]),
|
||||
key2vec(Schema, [0, 1]),
|
||||
key2vec(Schema, [255, 0]).
|
||||
|
||||
make_bitmask0_test() ->
|
||||
Keymapper = make_keymapper([]),
|
||||
?assertMatch({0, 0}, mkbmask(Keymapper, [])).
|
||||
|
||||
make_bitmask1_test() ->
|
||||
Keymapper = make_keymapper([{1, 0, 8}]),
|
||||
?assertEqual({0, 0}, mkbmask(Keymapper, [any])),
|
||||
?assertEqual({16#ff, 1}, mkbmask(Keymapper, [{'=', 1}])),
|
||||
?assertEqual({16#ff, 255}, mkbmask(Keymapper, [{'=', 255}])),
|
||||
?assertEqual({0, 0}, mkbmask(Keymapper, [{'>=', 0}])),
|
||||
?assertEqual({0, 0}, mkbmask(Keymapper, [{'>=', 1}])),
|
||||
?assertEqual({0, 0}, mkbmask(Keymapper, [{'>=', 16#f}])).
|
||||
|
||||
make_bitmask2_test() ->
|
||||
Keymapper = make_keymapper([{1, 0, 3}, {2, 0, 4}, {3, 0, 2}]),
|
||||
?assertEqual({2#00_0000_000, 2#00_0000_000}, mkbmask(Keymapper, [any, any, any])),
|
||||
?assertEqual({2#11_0000_000, 2#00_0000_000}, mkbmask(Keymapper, [any, any, {'=', 0}])),
|
||||
?assertEqual({2#00_1111_000, 2#00_0000_000}, mkbmask(Keymapper, [any, {'=', 0}, any])),
|
||||
?assertEqual({2#00_0000_111, 2#00_0000_000}, mkbmask(Keymapper, [{'=', 0}, any, any])).
|
||||
|
||||
make_bitmask3_test() ->
|
||||
%% Key format of type |TimeOffset|Topic|Epoch|:
|
||||
Keymapper = make_keymapper([{1, 0, 8}, {2, 0, 8}, {1, 8, 8}]),
|
||||
?assertEqual({2#00000000_00000000_00000000, 16#00_00_00}, mkbmask(Keymapper, [any, any])),
|
||||
?assertEqual(
|
||||
{2#11111111_11111111_11111111, 16#aa_cc_bb},
|
||||
mkbmask(Keymapper, [{'=', 16#aabb}, {'=', 16#cc}])
|
||||
),
|
||||
?assertEqual(
|
||||
{2#00000000_11111111_00000000, 16#00_bb_00}, mkbmask(Keymapper, [{'>=', 255}, {'=', 16#bb}])
|
||||
).
|
||||
|
||||
make_filter_test() ->
|
||||
KeyMapper = make_keymapper([]),
|
||||
Filter = [],
|
||||
?assertMatch(#filter{size = 0, bitmask = 0, bitfilter = 0}, make_filter(KeyMapper, Filter)).
|
||||
|
||||
ratchet1_test() ->
|
||||
Bitsources = [{1, 0, 8}],
|
||||
M = make_keymapper(Bitsources),
|
||||
F = make_filter(M, [any]),
|
||||
#filter{bitsource_ranges = Rarr} = F,
|
||||
?assertMatch(
|
||||
[
|
||||
#filter_scan_action{
|
||||
offset = 0,
|
||||
size = 8,
|
||||
min = 0,
|
||||
max = 16#ff
|
||||
}
|
||||
],
|
||||
array:to_list(Rarr)
|
||||
),
|
||||
?assertEqual(0, ratchet(F, 0)),
|
||||
?assertEqual(16#fa, ratchet(F, 16#fa)),
|
||||
?assertEqual(16#ff, ratchet(F, 16#ff)),
|
||||
?assertEqual(overflow, ratchet(F, 16#100)).
|
||||
|
||||
%% erlfmt-ignore
|
||||
ratchet2_test() ->
|
||||
Bitsources = [{1, 0, 8}, %% Static topic index
|
||||
{2, 8, 8}, %% Epoch
|
||||
{3, 0, 8}, %% Varying topic hash
|
||||
{2, 0, 8}], %% Timestamp offset
|
||||
M = make_keymapper(lists:reverse(Bitsources)),
|
||||
F1 = make_filter(M, [{'=', 16#aa}, any, {'=', 16#cc}]),
|
||||
?assertEqual(16#aa00cc00, ratchet(F1, 0)),
|
||||
?assertEqual(16#aa01cc00, ratchet(F1, 16#aa00cd00)),
|
||||
?assertEqual(16#aa01cc11, ratchet(F1, 16#aa01cc11)),
|
||||
?assertEqual(16#aa11cc00, ratchet(F1, 16#aa10cd00)),
|
||||
?assertEqual(16#aa11cc00, ratchet(F1, 16#aa10dc11)),
|
||||
?assertEqual(overflow, ratchet(F1, 16#ab000000)),
|
||||
F2 = make_filter(M, [{'=', 16#aa}, {'>=', 16#dddd}, {'=', 16#cc}]),
|
||||
%% TODO: note that it's `16#aaddcc00` instead of
|
||||
%% `16#aaddccdd'. That is because currently ratchet function
|
||||
%% doesn't take LSBs of an '>=' interval if it has a hole in the
|
||||
%% middle (see `make_filter/2'). This only adds extra keys to the
|
||||
%% very first interval, so it's not deemed a huge problem.
|
||||
?assertEqual(16#aaddcc00, ratchet(F2, 0)),
|
||||
?assertEqual(16#aa_de_cc_00, ratchet(F2, 16#aa_dd_cd_11)).
|
||||
|
||||
%% erlfmt-ignore
|
||||
ratchet3_test_() ->
|
||||
EpochBits = 4,
|
||||
Bitsources = [{1, 0, 2}, %% Static topic index
|
||||
{2, EpochBits, 4}, %% Epoch
|
||||
{3, 0, 2}, %% Varying topic hash
|
||||
{2, 0, EpochBits}], %% Timestamp offset
|
||||
Keymapper = make_keymapper(lists:reverse(Bitsources)),
|
||||
Filter1 = make_filter(Keymapper, [{'=', 2#10}, any, {'=', 2#01}]),
|
||||
Filter2 = make_filter(Keymapper, [{'=', 2#01}, any, any]),
|
||||
Filter3 = make_filter(Keymapper, [{'=', 2#01}, {'>=', 16#aa}, any]),
|
||||
{timeout, 15,
|
||||
[?_assert(test_iterate(Filter1, 0)),
|
||||
?_assert(test_iterate(Filter2, 0)),
|
||||
%% Not starting from 0 here for simplicity, since the beginning
|
||||
%% of a >= interval can't be properly checked with a bitmask:
|
||||
?_assert(test_iterate(Filter3, ratchet(Filter3, 1)))
|
||||
]}.
|
||||
|
||||
%% Note: this function iterates through the full range of keys, so its
|
||||
%% complexity grows _exponentially_ with the total size of the
|
||||
%% keymapper.
|
||||
test_iterate(_Filter, overflow) ->
|
||||
true;
|
||||
test_iterate(Filter, Key0) ->
|
||||
Key = ratchet(Filter, Key0 + 1),
|
||||
?assert(ratchet_prop(Filter, Key0, Key)),
|
||||
test_iterate(Filter, Key).
|
||||
|
||||
ratchet_prop(#filter{bitfilter = Bitfilter, bitmask = Bitmask, size = Size}, Key0, Key) ->
|
||||
%% Validate basic properties of the generated key. It must be
|
||||
%% greater than the old key, and match the bitmask:
|
||||
?assert(Key =:= overflow orelse (Key band Bitmask =:= Bitfilter)),
|
||||
?assert(Key > Key0, {Key, '>=', Key0}),
|
||||
IMax = ones(Size),
|
||||
%% Iterate through all keys between `Key0 + 1' and `Key' and
|
||||
%% validate that none of them match the bitmask. Ultimately, it
|
||||
%% means that `ratchet' function doesn't skip over any valid keys:
|
||||
CheckGaps = fun
|
||||
F(I) when I >= Key; I > IMax ->
|
||||
true;
|
||||
F(I) ->
|
||||
?assertNot(
|
||||
I band Bitmask =:= Bitfilter,
|
||||
{found_gap, Key0, I, Key}
|
||||
),
|
||||
F(I + 1)
|
||||
end,
|
||||
CheckGaps(Key0 + 1).
|
||||
|
||||
mkbmask(Keymapper, Filter0) ->
|
||||
Filter = constraints_to_ranges(Keymapper, Filter0),
|
||||
make_bitfilter(Keymapper, Filter).
|
||||
|
||||
key2vec(Schema, Vector) ->
|
||||
Keymapper = make_keymapper(Schema),
|
||||
Key = vector_to_key(Keymapper, Vector),
|
||||
?assertEqual(Vector, key_to_vector(Keymapper, Key)).
|
||||
|
||||
vec2key(Schema, Vector) ->
|
||||
vector_to_key(make_keymapper(Schema), Vector).
|
||||
|
||||
-endif.
|
|
@ -0,0 +1,619 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_ds_lts).
|
||||
|
||||
%% API:
|
||||
-export([
|
||||
trie_create/1, trie_create/0, trie_restore/2, topic_key/3, match_topics/2, lookup_topic_key/2
|
||||
]).
|
||||
|
||||
%% Debug:
|
||||
-export([trie_next/3, trie_insert/3, dump_to_dot/2]).
|
||||
|
||||
-export_type([options/0, static_key/0, trie/0]).
|
||||
|
||||
-include_lib("stdlib/include/ms_transform.hrl").
|
||||
|
||||
-ifdef(TEST).
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
-endif.
|
||||
|
||||
-elvis([{elvis_style, variable_naming_convention, disable}]).
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
%% End Of Topic
|
||||
-define(EOT, []).
|
||||
-define(PLUS, '+').
|
||||
|
||||
-type edge() :: binary() | ?EOT | ?PLUS.
|
||||
|
||||
%% Fixed size binary
|
||||
-type static_key() :: non_neg_integer().
|
||||
|
||||
-define(PREFIX, prefix).
|
||||
-type state() :: static_key() | ?PREFIX.
|
||||
|
||||
-type varying() :: [binary() | ?PLUS].
|
||||
|
||||
-type msg_storage_key() :: {static_key(), varying()}.
|
||||
|
||||
-type threshold_fun() :: fun((non_neg_integer()) -> non_neg_integer()).
|
||||
|
||||
-type persist_callback() :: fun((_Key, _Val) -> ok).
|
||||
|
||||
-type options() ::
|
||||
#{
|
||||
persist_callback => persist_callback(),
|
||||
static_key_size => pos_integer()
|
||||
}.
|
||||
|
||||
-record(trie, {
|
||||
persist :: persist_callback(),
|
||||
static_key_size :: pos_integer(),
|
||||
trie :: ets:tid(),
|
||||
stats :: ets:tid()
|
||||
}).
|
||||
|
||||
-opaque trie() :: #trie{}.
|
||||
|
||||
-record(trans, {
|
||||
key :: {state(), edge()},
|
||||
next :: state()
|
||||
}).
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
%% @doc Create an empty trie
|
||||
-spec trie_create(options()) -> trie().
|
||||
trie_create(UserOpts) ->
|
||||
Defaults = #{
|
||||
persist_callback => fun(_, _) -> ok end,
|
||||
static_key_size => 8
|
||||
},
|
||||
#{
|
||||
persist_callback := Persist,
|
||||
static_key_size := StaticKeySize
|
||||
} = maps:merge(Defaults, UserOpts),
|
||||
Trie = ets:new(trie, [{keypos, #trans.key}, set, public]),
|
||||
Stats = ets:new(stats, [{keypos, 1}, set, public]),
|
||||
#trie{
|
||||
persist = Persist,
|
||||
static_key_size = StaticKeySize,
|
||||
trie = Trie,
|
||||
stats = Stats
|
||||
}.
|
||||
|
||||
-spec trie_create() -> trie().
|
||||
trie_create() ->
|
||||
trie_create(#{}).
|
||||
|
||||
%% @doc Restore trie from a dump
|
||||
-spec trie_restore(options(), [{_Key, _Val}]) -> trie().
|
||||
trie_restore(Options, Dump) ->
|
||||
Trie = trie_create(Options),
|
||||
lists:foreach(
|
||||
fun({{StateFrom, Token}, StateTo}) ->
|
||||
trie_insert(Trie, StateFrom, Token, StateTo)
|
||||
end,
|
||||
Dump
|
||||
),
|
||||
Trie.
|
||||
|
||||
%% @doc Lookup the topic key. Create a new one, if not found.
|
||||
-spec topic_key(trie(), threshold_fun(), [binary()]) -> msg_storage_key().
|
||||
topic_key(Trie, ThresholdFun, Tokens) ->
|
||||
do_topic_key(Trie, ThresholdFun, 0, ?PREFIX, Tokens, []).
|
||||
|
||||
%% @doc Return an exisiting topic key if it exists.
|
||||
-spec lookup_topic_key(trie(), [binary()]) -> {ok, msg_storage_key()} | undefined.
|
||||
lookup_topic_key(Trie, Tokens) ->
|
||||
do_lookup_topic_key(Trie, ?PREFIX, Tokens, []).
|
||||
|
||||
%% @doc Return list of keys of topics that match a given topic filter
|
||||
-spec match_topics(trie(), [binary() | '+' | '#']) ->
|
||||
[msg_storage_key()].
|
||||
match_topics(Trie, TopicFilter) ->
|
||||
do_match_topics(Trie, ?PREFIX, [], TopicFilter).
|
||||
|
||||
%% @doc Dump trie to graphviz format for debugging
|
||||
-spec dump_to_dot(trie(), file:filename()) -> ok.
|
||||
dump_to_dot(#trie{trie = Trie, stats = Stats}, Filename) ->
|
||||
L = ets:tab2list(Trie),
|
||||
{Nodes0, Edges} =
|
||||
lists:foldl(
|
||||
fun(#trans{key = {From, Label}, next = To}, {AccN, AccEdge}) ->
|
||||
Edge = {From, To, Label},
|
||||
{[From, To] ++ AccN, [Edge | AccEdge]}
|
||||
end,
|
||||
{[], []},
|
||||
L
|
||||
),
|
||||
Nodes =
|
||||
lists:map(
|
||||
fun(Node) ->
|
||||
case ets:lookup(Stats, Node) of
|
||||
[{_, NChildren}] -> ok;
|
||||
[] -> NChildren = 0
|
||||
end,
|
||||
{Node, NChildren}
|
||||
end,
|
||||
lists:usort(Nodes0)
|
||||
),
|
||||
{ok, FD} = file:open(Filename, [write]),
|
||||
Print = fun
|
||||
(?PREFIX) -> "prefix";
|
||||
(NodeId) -> integer_to_binary(NodeId, 16)
|
||||
end,
|
||||
io:format(FD, "digraph {~n", []),
|
||||
lists:foreach(
|
||||
fun({Node, NChildren}) ->
|
||||
Id = Print(Node),
|
||||
io:format(FD, " \"~s\" [label=\"~s : ~p\"];~n", [Id, Id, NChildren])
|
||||
end,
|
||||
Nodes
|
||||
),
|
||||
lists:foreach(
|
||||
fun({From, To, Label}) ->
|
||||
io:format(FD, " \"~s\" -> \"~s\" [label=\"~s\"];~n", [Print(From), Print(To), Label])
|
||||
end,
|
||||
Edges
|
||||
),
|
||||
io:format(FD, "}~n", []),
|
||||
file:close(FD).
|
||||
|
||||
%%================================================================================
|
||||
%% Internal exports
|
||||
%%================================================================================
|
||||
|
||||
-spec trie_next(trie(), state(), binary() | ?EOT) -> {Wildcard, state()} | undefined when
|
||||
Wildcard :: boolean().
|
||||
trie_next(#trie{trie = Trie}, State, ?EOT) ->
|
||||
case ets:lookup(Trie, {State, ?EOT}) of
|
||||
[#trans{next = Next}] -> {false, Next};
|
||||
[] -> undefined
|
||||
end;
|
||||
trie_next(#trie{trie = Trie}, State, Token) ->
|
||||
case ets:lookup(Trie, {State, Token}) of
|
||||
[#trans{next = Next}] ->
|
||||
{false, Next};
|
||||
[] ->
|
||||
case ets:lookup(Trie, {State, ?PLUS}) of
|
||||
[#trans{next = Next}] -> {true, Next};
|
||||
[] -> undefined
|
||||
end
|
||||
end.
|
||||
|
||||
-spec trie_insert(trie(), state(), edge()) -> {Updated, state()} when
|
||||
NChildren :: non_neg_integer(),
|
||||
Updated :: false | NChildren.
|
||||
trie_insert(Trie, State, Token) ->
|
||||
trie_insert(Trie, State, Token, get_id_for_key(Trie, State, Token)).
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
-spec trie_insert(trie(), state(), edge(), state()) -> {Updated, state()} when
|
||||
NChildren :: non_neg_integer(),
|
||||
Updated :: false | NChildren.
|
||||
trie_insert(#trie{trie = Trie, stats = Stats, persist = Persist}, State, Token, NewState) ->
|
||||
Key = {State, Token},
|
||||
Rec = #trans{
|
||||
key = Key,
|
||||
next = NewState
|
||||
},
|
||||
case ets:insert_new(Trie, Rec) of
|
||||
true ->
|
||||
ok = Persist(Key, NewState),
|
||||
Inc =
|
||||
case Token of
|
||||
?EOT -> 0;
|
||||
?PLUS -> 0;
|
||||
_ -> 1
|
||||
end,
|
||||
NChildren = ets:update_counter(Stats, State, {2, Inc}, {State, 0}),
|
||||
{NChildren, NewState};
|
||||
false ->
|
||||
[#trans{next = NextState}] = ets:lookup(Trie, Key),
|
||||
{false, NextState}
|
||||
end.
|
||||
|
||||
-spec get_id_for_key(trie(), state(), edge()) -> static_key().
|
||||
get_id_for_key(#trie{static_key_size = Size}, _State, _Token) ->
|
||||
%% Requirements for the return value:
|
||||
%%
|
||||
%% It should be globally unique for the `{State, Token}` pair. Other
|
||||
%% than that, there's no requirements. The return value doesn't even
|
||||
%% have to be deterministic, since the states are saved in the trie.
|
||||
%%
|
||||
%% The generated value becomes the ID of the topic in the durable
|
||||
%% storage. Its size should be relatively small to reduce the
|
||||
%% overhead of storing messages.
|
||||
%%
|
||||
%% If we want to impress computer science crowd, sorry, I mean to
|
||||
%% minimize storage requirements, we can even employ Huffman coding
|
||||
%% based on the frequency of messages.
|
||||
<<Int:(Size * 8)>> = crypto:strong_rand_bytes(Size),
|
||||
Int.
|
||||
|
||||
%% erlfmt-ignore
|
||||
-spec do_match_topics(trie(), state(), [binary() | '+'], [binary() | '+' | '#']) ->
|
||||
list().
|
||||
do_match_topics(Trie, State, Varying, []) ->
|
||||
case trie_next(Trie, State, ?EOT) of
|
||||
{false, Static} -> [{Static, lists:reverse(Varying)}];
|
||||
undefined -> []
|
||||
end;
|
||||
do_match_topics(Trie, State, Varying, ['#']) ->
|
||||
Emanating = emanating(Trie, State, ?PLUS),
|
||||
lists:flatmap(
|
||||
fun
|
||||
({?EOT, Static}) ->
|
||||
[{Static, lists:reverse(Varying)}];
|
||||
({?PLUS, NextState}) ->
|
||||
do_match_topics(Trie, NextState, [?PLUS | Varying], ['#']);
|
||||
({_, NextState}) ->
|
||||
do_match_topics(Trie, NextState, Varying, ['#'])
|
||||
end,
|
||||
Emanating
|
||||
);
|
||||
do_match_topics(Trie, State, Varying, [Level | Rest]) ->
|
||||
Emanating = emanating(Trie, State, Level),
|
||||
lists:flatmap(
|
||||
fun
|
||||
({?EOT, _NextState}) ->
|
||||
[];
|
||||
({?PLUS, NextState}) ->
|
||||
do_match_topics(Trie, NextState, [Level | Varying], Rest);
|
||||
({_, NextState}) ->
|
||||
do_match_topics(Trie, NextState, Varying, Rest)
|
||||
end,
|
||||
Emanating
|
||||
).
|
||||
|
||||
-spec do_lookup_topic_key(trie(), state(), [binary()], [binary()]) ->
|
||||
{ok, msg_storage_key()} | undefined.
|
||||
do_lookup_topic_key(Trie, State, [], Varying) ->
|
||||
case trie_next(Trie, State, ?EOT) of
|
||||
{false, Static} ->
|
||||
{ok, {Static, lists:reverse(Varying)}};
|
||||
undefined ->
|
||||
undefined
|
||||
end;
|
||||
do_lookup_topic_key(Trie, State, [Tok | Rest], Varying) ->
|
||||
case trie_next(Trie, State, Tok) of
|
||||
{true, NextState} ->
|
||||
do_lookup_topic_key(Trie, NextState, Rest, [Tok | Varying]);
|
||||
{false, NextState} ->
|
||||
do_lookup_topic_key(Trie, NextState, Rest, Varying);
|
||||
undefined ->
|
||||
undefined
|
||||
end.
|
||||
|
||||
do_topic_key(Trie, _, _, State, [], Varying) ->
|
||||
%% We reached the end of topic. Assert: Trie node that corresponds
|
||||
%% to EOT cannot be a wildcard.
|
||||
{_, false, Static} = trie_next_(Trie, State, ?EOT),
|
||||
{Static, lists:reverse(Varying)};
|
||||
do_topic_key(Trie, ThresholdFun, Depth, State, [Tok | Rest], Varying0) ->
|
||||
% TODO: it's not necessary to call it every time.
|
||||
Threshold = ThresholdFun(Depth),
|
||||
Varying =
|
||||
case trie_next_(Trie, State, Tok) of
|
||||
{NChildren, _, NextState} when is_integer(NChildren), NChildren >= Threshold ->
|
||||
%% Number of children for the trie node reached the
|
||||
%% threshold, we need to insert wildcard here.
|
||||
{_, _WildcardState} = trie_insert(Trie, State, ?PLUS),
|
||||
Varying0;
|
||||
{_, false, NextState} ->
|
||||
Varying0;
|
||||
{_, true, NextState} ->
|
||||
%% This topic level is marked as wildcard in the trie,
|
||||
%% we need to add it to the varying part of the key:
|
||||
[Tok | Varying0]
|
||||
end,
|
||||
do_topic_key(Trie, ThresholdFun, Depth + 1, NextState, Rest, Varying).
|
||||
|
||||
%% @doc Has side effects! Inserts missing elements
|
||||
-spec trie_next_(trie(), state(), binary() | ?EOT) -> {New, Wildcard, state()} when
|
||||
New :: false | non_neg_integer(),
|
||||
Wildcard :: boolean().
|
||||
trie_next_(Trie, State, Token) ->
|
||||
case trie_next(Trie, State, Token) of
|
||||
{Wildcard, NextState} ->
|
||||
{false, Wildcard, NextState};
|
||||
undefined ->
|
||||
{Updated, NextState} = trie_insert(Trie, State, Token),
|
||||
{Updated, false, NextState}
|
||||
end.
|
||||
|
||||
%% @doc Return all edges emanating from a node:
|
||||
%% erlfmt-ignore
|
||||
-spec emanating(trie(), state(), edge()) -> [{edge(), state()}].
|
||||
emanating(#trie{trie = Tab}, State, ?PLUS) ->
|
||||
ets:select(
|
||||
Tab,
|
||||
ets:fun2ms(
|
||||
fun(#trans{key = {S, Edge}, next = Next}) when S == State ->
|
||||
{Edge, Next}
|
||||
end
|
||||
)
|
||||
);
|
||||
emanating(#trie{trie = Tab}, State, ?EOT) ->
|
||||
case ets:lookup(Tab, {State, ?EOT}) of
|
||||
[#trans{next = Next}] -> [{?EOT, Next}];
|
||||
[] -> []
|
||||
end;
|
||||
emanating(#trie{trie = Tab}, State, Bin) when is_binary(Bin) ->
|
||||
[
|
||||
{Edge, Next}
|
||||
|| #trans{key = {_, Edge}, next = Next} <-
|
||||
ets:lookup(Tab, {State, ?PLUS}) ++
|
||||
ets:lookup(Tab, {State, Bin})
|
||||
].
|
||||
|
||||
%%================================================================================
|
||||
%% Tests
|
||||
%%================================================================================
|
||||
|
||||
-ifdef(TEST).
|
||||
|
||||
trie_basic_test() ->
|
||||
T = trie_create(),
|
||||
?assertMatch(undefined, trie_next(T, ?PREFIX, <<"foo">>)),
|
||||
{1, S1} = trie_insert(T, ?PREFIX, <<"foo">>),
|
||||
?assertMatch({false, S1}, trie_insert(T, ?PREFIX, <<"foo">>)),
|
||||
?assertMatch({false, S1}, trie_next(T, ?PREFIX, <<"foo">>)),
|
||||
|
||||
?assertMatch(undefined, trie_next(T, ?PREFIX, <<"bar">>)),
|
||||
{2, S2} = trie_insert(T, ?PREFIX, <<"bar">>),
|
||||
?assertMatch({false, S2}, trie_insert(T, ?PREFIX, <<"bar">>)),
|
||||
|
||||
?assertMatch(undefined, trie_next(T, S1, <<"foo">>)),
|
||||
?assertMatch(undefined, trie_next(T, S1, <<"bar">>)),
|
||||
{1, S11} = trie_insert(T, S1, <<"foo">>),
|
||||
{2, S12} = trie_insert(T, S1, <<"bar">>),
|
||||
?assertMatch({false, S11}, trie_next(T, S1, <<"foo">>)),
|
||||
?assertMatch({false, S12}, trie_next(T, S1, <<"bar">>)),
|
||||
|
||||
?assertMatch(undefined, trie_next(T, S11, <<"bar">>)),
|
||||
{1, S111} = trie_insert(T, S11, <<"bar">>),
|
||||
?assertMatch({false, S111}, trie_next(T, S11, <<"bar">>)).
|
||||
|
||||
lookup_key_test() ->
|
||||
T = trie_create(),
|
||||
{_, S1} = trie_insert(T, ?PREFIX, <<"foo">>),
|
||||
{_, S11} = trie_insert(T, S1, <<"foo">>),
|
||||
%% Topics don't match until we insert ?EOT:
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"foo">>])
|
||||
),
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"foo">>, <<"foo">>])
|
||||
),
|
||||
{_, S10} = trie_insert(T, S1, ?EOT),
|
||||
{_, S110} = trie_insert(T, S11, ?EOT),
|
||||
?assertMatch(
|
||||
{ok, {S10, []}},
|
||||
lookup_topic_key(T, [<<"foo">>])
|
||||
),
|
||||
?assertMatch(
|
||||
{ok, {S110, []}},
|
||||
lookup_topic_key(T, [<<"foo">>, <<"foo">>])
|
||||
),
|
||||
%% The rest of keys still don't match:
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"bar">>])
|
||||
),
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"bar">>, <<"foo">>])
|
||||
).
|
||||
|
||||
wildcard_lookup_test() ->
|
||||
T = trie_create(),
|
||||
{1, S1} = trie_insert(T, ?PREFIX, <<"foo">>),
|
||||
%% Plus doesn't increase the number of children
|
||||
{0, S11} = trie_insert(T, S1, ?PLUS),
|
||||
{1, S111} = trie_insert(T, S11, <<"foo">>),
|
||||
%% ?EOT doesn't increase the number of children
|
||||
{0, S1110} = trie_insert(T, S111, ?EOT),
|
||||
?assertMatch(
|
||||
{ok, {S1110, [<<"bar">>]}},
|
||||
lookup_topic_key(T, [<<"foo">>, <<"bar">>, <<"foo">>])
|
||||
),
|
||||
?assertMatch(
|
||||
{ok, {S1110, [<<"quux">>]}},
|
||||
lookup_topic_key(T, [<<"foo">>, <<"quux">>, <<"foo">>])
|
||||
),
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"foo">>])
|
||||
),
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"foo">>, <<"bar">>])
|
||||
),
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"foo">>, <<"bar">>, <<"bar">>])
|
||||
),
|
||||
?assertMatch(
|
||||
undefined,
|
||||
lookup_topic_key(T, [<<"bar">>, <<"foo">>, <<"foo">>])
|
||||
),
|
||||
{_, S10} = trie_insert(T, S1, ?EOT),
|
||||
?assertMatch(
|
||||
{ok, {S10, []}},
|
||||
lookup_topic_key(T, [<<"foo">>])
|
||||
).
|
||||
|
||||
%% erlfmt-ignore
|
||||
topic_key_test() ->
|
||||
T = trie_create(),
|
||||
try
|
||||
Threshold = 4,
|
||||
ThresholdFun = fun(0) -> 1000;
|
||||
(_) -> Threshold
|
||||
end,
|
||||
%% Test that bottom layer threshold is high:
|
||||
lists:foreach(
|
||||
fun(I) ->
|
||||
{_, []} = test_key(T, ThresholdFun, [I, 99999, 999999, 99999])
|
||||
end,
|
||||
lists:seq(1, 10)),
|
||||
%% Test adding children on the 2nd level:
|
||||
lists:foreach(
|
||||
fun(I) ->
|
||||
case test_key(T, ThresholdFun, [1, I, 1]) of
|
||||
{_, []} ->
|
||||
?assert(I < Threshold, {I, '<', Threshold}),
|
||||
ok;
|
||||
{_, [Var]} ->
|
||||
?assert(I >= Threshold, {I, '>=', Threshold}),
|
||||
?assertEqual(Var, integer_to_binary(I))
|
||||
end
|
||||
end,
|
||||
lists:seq(1, 100)),
|
||||
%% This doesn't affect 2nd level with a different prefix:
|
||||
?assertMatch({_, []}, test_key(T, ThresholdFun, [2, 1, 1])),
|
||||
?assertMatch({_, []}, test_key(T, ThresholdFun, [2, 10, 1])),
|
||||
%% This didn't retroactively change the indexes that were
|
||||
%% created prior to reaching the threshold:
|
||||
?assertMatch({_, []}, test_key(T, ThresholdFun, [1, 1, 1])),
|
||||
?assertMatch({_, []}, test_key(T, ThresholdFun, [1, 2, 1])),
|
||||
%% Now create another level of +:
|
||||
lists:foreach(
|
||||
fun(I) ->
|
||||
case test_key(T, ThresholdFun, [1, 42, 1, I, 42]) of
|
||||
{_, [<<"42">>]} when I =< Threshold -> %% TODO: off by 1 error
|
||||
ok;
|
||||
{_, [<<"42">>, Var]} ->
|
||||
?assertEqual(Var, integer_to_binary(I));
|
||||
Ret ->
|
||||
error({Ret, I})
|
||||
end
|
||||
end,
|
||||
lists:seq(1, 100))
|
||||
after
|
||||
dump_to_dot(T, filename:join("_build", atom_to_list(?FUNCTION_NAME) ++ ".dot"))
|
||||
end.
|
||||
|
||||
%% erlfmt-ignore
|
||||
topic_match_test() ->
|
||||
T = trie_create(),
|
||||
try
|
||||
Threshold = 2,
|
||||
ThresholdFun = fun(0) -> 1000;
|
||||
(_) -> Threshold
|
||||
end,
|
||||
{S1, []} = test_key(T, ThresholdFun, [1]),
|
||||
{S11, []} = test_key(T, ThresholdFun, [1, 1]),
|
||||
{S12, []} = test_key(T, ThresholdFun, [1, 2]),
|
||||
{S111, []} = test_key(T, ThresholdFun, [1, 1, 1]),
|
||||
%% Match concrete topics:
|
||||
assert_match_topics(T, [1], [{S1, []}]),
|
||||
assert_match_topics(T, [1, 1], [{S11, []}]),
|
||||
assert_match_topics(T, [1, 1, 1], [{S111, []}]),
|
||||
%% Match topics with +:
|
||||
assert_match_topics(T, [1, '+'], [{S11, []}, {S12, []}]),
|
||||
assert_match_topics(T, [1, '+', 1], [{S111, []}]),
|
||||
%% Match topics with #:
|
||||
assert_match_topics(T, [1, '#'],
|
||||
[{S1, []},
|
||||
{S11, []}, {S12, []},
|
||||
{S111, []}]),
|
||||
assert_match_topics(T, [1, 1, '#'],
|
||||
[{S11, []},
|
||||
{S111, []}]),
|
||||
%% Now add learned wildcards:
|
||||
{S21, []} = test_key(T, ThresholdFun, [2, 1]),
|
||||
{S22, []} = test_key(T, ThresholdFun, [2, 2]),
|
||||
{S2_, [<<"3">>]} = test_key(T, ThresholdFun, [2, 3]),
|
||||
{S2_11, [<<"3">>]} = test_key(T, ThresholdFun, [2, 3, 1, 1]),
|
||||
{S2_12, [<<"4">>]} = test_key(T, ThresholdFun, [2, 4, 1, 2]),
|
||||
{S2_1_, [<<"3">>, <<"3">>]} = test_key(T, ThresholdFun, [2, 3, 1, 3]),
|
||||
%% %% Check matching:
|
||||
assert_match_topics(T, [2, 2],
|
||||
[{S22, []}, {S2_, [<<"2">>]}]),
|
||||
assert_match_topics(T, [2, '+'],
|
||||
[{S22, []}, {S21, []}, {S2_, ['+']}]),
|
||||
assert_match_topics(T, [2, '#'],
|
||||
[{S21, []}, {S22, []},
|
||||
{S2_, ['+']},
|
||||
{S2_11, ['+']}, {S2_12, ['+']}, {S2_1_, ['+', '+']}]),
|
||||
ok
|
||||
after
|
||||
dump_to_dot(T, filename:join("_build", atom_to_list(?FUNCTION_NAME) ++ ".dot"))
|
||||
end.
|
||||
|
||||
-define(keys_history, topic_key_history).
|
||||
|
||||
%% erlfmt-ignore
|
||||
assert_match_topics(Trie, Filter0, Expected) ->
|
||||
Filter = lists:map(fun(I) when is_integer(I) -> integer_to_binary(I);
|
||||
(I) -> I
|
||||
end,
|
||||
Filter0),
|
||||
Matched = match_topics(Trie, Filter),
|
||||
?assertMatch( #{missing := [], unexpected := []}
|
||||
, #{ missing => Expected -- Matched
|
||||
, unexpected => Matched -- Expected
|
||||
}
|
||||
, Filter
|
||||
).
|
||||
|
||||
%% erlfmt-ignore
|
||||
test_key(Trie, Threshold, Topic0) ->
|
||||
Topic = [integer_to_binary(I) || I <- Topic0],
|
||||
Ret = topic_key(Trie, Threshold, Topic),
|
||||
%% Test idempotency:
|
||||
Ret1 = topic_key(Trie, Threshold, Topic),
|
||||
?assertEqual(Ret, Ret1, Topic),
|
||||
%% Add new key to the history:
|
||||
case get(?keys_history) of
|
||||
undefined -> OldHistory = #{};
|
||||
OldHistory -> ok
|
||||
end,
|
||||
%% Test that the generated keys are always unique for the topic:
|
||||
History = maps:update_with(
|
||||
Ret,
|
||||
fun(Old) ->
|
||||
case Old =:= Topic of
|
||||
true -> Old;
|
||||
false -> error(#{ '$msg' => "Duplicate key!"
|
||||
, key => Ret
|
||||
, old_topic => Old
|
||||
, new_topic => Topic
|
||||
})
|
||||
end
|
||||
end,
|
||||
Topic,
|
||||
OldHistory),
|
||||
put(?keys_history, History),
|
||||
{ok, Ret} = lookup_topic_key(Trie, Topic),
|
||||
Ret.
|
||||
|
||||
-endif.
|
|
@ -1,742 +0,0 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(emqx_ds_message_storage_bitmask).
|
||||
|
||||
%%================================================================================
|
||||
%% @doc Description of the schema
|
||||
%%
|
||||
%% Let us assume that `T' is a topic and `t' is time. These are the two
|
||||
%% dimensions used to index messages. They can be viewed as
|
||||
%% "coordinates" of an MQTT message in a 2D space.
|
||||
%%
|
||||
%% Oftentimes, when wildcard subscription is used, keys must be
|
||||
%% scanned in both dimensions simultaneously.
|
||||
%%
|
||||
%% Rocksdb allows to iterate over sorted keys very fast. This means we
|
||||
%% need to map our two-dimentional keys to a single index that is
|
||||
%% sorted in a way that helps to iterate over both time and topic
|
||||
%% without having to do a lot of random seeks.
|
||||
%%
|
||||
%% == Mapping of 2D keys to rocksdb keys ==
|
||||
%%
|
||||
%% We use "zigzag" pattern to store messages, where rocksdb key is
|
||||
%% composed like like this:
|
||||
%%
|
||||
%% |ttttt|TTTTTTTTT|tttt|
|
||||
%% ^ ^ ^
|
||||
%% | | |
|
||||
%% +-------+ | +---------+
|
||||
%% | | |
|
||||
%% most significant topic hash least significant
|
||||
%% bits of timestamp bits of timestamp
|
||||
%% (a.k.a epoch) (a.k.a time offset)
|
||||
%%
|
||||
%% Topic hash is level-aware: each topic level is hashed separately
|
||||
%% and the resulting hashes are bitwise-concatentated. This allows us
|
||||
%% to map topics to fixed-length bitstrings while keeping some degree
|
||||
%% of information about the hierarchy.
|
||||
%%
|
||||
%% Next important concept is what we call "epoch". Duration of the
|
||||
%% epoch is determined by maximum time offset. Epoch is calculated by
|
||||
%% shifting bits of the timestamp right.
|
||||
%%
|
||||
%% The resulting index is a space-filling curve that looks like
|
||||
%% this in the topic-time 2D space:
|
||||
%%
|
||||
%% T ^ ---->------ |---->------ |---->------
|
||||
%% | --/ / --/ / --/
|
||||
%% | -<-/ | -<-/ | -<-/
|
||||
%% | -/ | -/ | -/
|
||||
%% | ---->------ | ---->------ | ---->------
|
||||
%% | --/ / --/ / --/
|
||||
%% | ---/ | ---/ | ---/
|
||||
%% | -/ ^ -/ ^ -/
|
||||
%% | ---->------ | ---->------ | ---->------
|
||||
%% | --/ / --/ / --/
|
||||
%% | -<-/ | -<-/ | -<-/
|
||||
%% | -/ | -/ | -/
|
||||
%% | ---->------| ---->------| ---------->
|
||||
%% |
|
||||
%% -+------------+-----------------------------> t
|
||||
%% epoch
|
||||
%%
|
||||
%% This structure allows to quickly seek to a the first message that
|
||||
%% was recorded in a certain epoch in a certain topic or a
|
||||
%% group of topics matching filter like `foo/bar/#`.
|
||||
%%
|
||||
%% Due to its structure, for each pair of rocksdb keys K1 and K2, such
|
||||
%% that K1 > K2 and topic(K1) = topic(K2), timestamp(K1) >
|
||||
%% timestamp(K2).
|
||||
%% That is, replay doesn't reorder messages published in each
|
||||
%% individual topic.
|
||||
%%
|
||||
%% This property doesn't hold between different topics, but it's not deemed
|
||||
%% a problem right now.
|
||||
%%
|
||||
%%================================================================================
|
||||
|
||||
-behaviour(emqx_ds_storage_layer).
|
||||
|
||||
%% API:
|
||||
-export([create_new/3, open/5]).
|
||||
-export([make_keymapper/1]).
|
||||
|
||||
-export([store/5]).
|
||||
-export([delete/4]).
|
||||
-export([make_iterator/2]).
|
||||
-export([make_iterator/3]).
|
||||
-export([next/1]).
|
||||
|
||||
-export([preserve_iterator/1]).
|
||||
-export([restore_iterator/3]).
|
||||
-export([refresh_iterator/1]).
|
||||
|
||||
%% Debug/troubleshooting:
|
||||
%% Keymappers
|
||||
-export([
|
||||
keymapper_info/1,
|
||||
compute_bitstring/3,
|
||||
compute_topic_bitmask/2,
|
||||
compute_time_bitmask/1,
|
||||
hash/2
|
||||
]).
|
||||
|
||||
%% Keyspace filters
|
||||
-export([
|
||||
make_keyspace_filter/2,
|
||||
compute_initial_seek/1,
|
||||
compute_next_seek/2,
|
||||
compute_time_seek/3,
|
||||
compute_topic_seek/4
|
||||
]).
|
||||
|
||||
-export_type([db/0, iterator/0, schema/0]).
|
||||
|
||||
-export_type([options/0]).
|
||||
-export_type([iteration_options/0]).
|
||||
|
||||
-compile(
|
||||
{inline, [
|
||||
bitwise_concat/3,
|
||||
ones/1,
|
||||
successor/1,
|
||||
topic_hash_matches/3,
|
||||
time_matches/3
|
||||
]}
|
||||
).
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
-type topic() :: emqx_ds:topic().
|
||||
-type topic_filter() :: emqx_ds:topic_filter().
|
||||
-type time() :: emqx_ds:time().
|
||||
|
||||
%% Number of bits
|
||||
-type bits() :: non_neg_integer().
|
||||
|
||||
%% Key of a RocksDB record.
|
||||
-type key() :: binary().
|
||||
|
||||
%% Distribution of entropy among topic levels.
|
||||
%% Example: [4, 8, 16] means that level 1 gets 4 bits, level 2 gets 8 bits,
|
||||
%% and _rest of levels_ (if any) get 16 bits.
|
||||
-type bits_per_level() :: [bits(), ...].
|
||||
|
||||
-type options() :: #{
|
||||
%% Number of bits in a message timestamp.
|
||||
timestamp_bits := bits(),
|
||||
%% Number of bits in a key allocated to each level in a message topic.
|
||||
topic_bits_per_level := bits_per_level(),
|
||||
%% Maximum granularity of iteration over time.
|
||||
epoch := time(),
|
||||
|
||||
iteration => iteration_options(),
|
||||
|
||||
cf_options => emqx_ds_storage_layer:db_cf_options()
|
||||
}.
|
||||
|
||||
-type iteration_options() :: #{
|
||||
%% Request periodic iterator refresh.
|
||||
%% This might be helpful during replays taking a lot of time (e.g. tens of seconds).
|
||||
%% Note that `{every, 1000}` means 1000 _operations_ with the iterator which is not
|
||||
%% the same as 1000 replayed messages.
|
||||
iterator_refresh => {every, _NumOperations :: pos_integer()}
|
||||
}.
|
||||
|
||||
%% Persistent configuration of the generation, it is used to create db
|
||||
%% record when the database is reopened
|
||||
-record(schema, {keymapper :: keymapper()}).
|
||||
|
||||
-opaque schema() :: #schema{}.
|
||||
|
||||
-record(db, {
|
||||
shard :: emqx_ds:shard(),
|
||||
handle :: rocksdb:db_handle(),
|
||||
cf :: rocksdb:cf_handle(),
|
||||
keymapper :: keymapper(),
|
||||
write_options = [{sync, true}] :: emqx_ds_storage_layer:db_write_options(),
|
||||
read_options = [] :: emqx_ds_storage_layer:db_read_options()
|
||||
}).
|
||||
|
||||
-record(it, {
|
||||
handle :: rocksdb:itr_handle(),
|
||||
filter :: keyspace_filter(),
|
||||
cursor :: binary() | undefined,
|
||||
next_action :: {seek, binary()} | next,
|
||||
refresh_counter :: {non_neg_integer(), pos_integer()} | undefined
|
||||
}).
|
||||
|
||||
-record(filter, {
|
||||
keymapper :: keymapper(),
|
||||
topic_filter :: topic_filter(),
|
||||
start_time :: integer(),
|
||||
hash_bitfilter :: integer(),
|
||||
hash_bitmask :: integer(),
|
||||
time_bitfilter :: integer(),
|
||||
time_bitmask :: integer()
|
||||
}).
|
||||
|
||||
% NOTE
|
||||
% Keymapper decides how to map messages into RocksDB column family keyspace.
|
||||
-record(keymapper, {
|
||||
source :: [bitsource(), ...],
|
||||
bitsize :: bits(),
|
||||
epoch :: non_neg_integer()
|
||||
}).
|
||||
|
||||
-type bitsource() ::
|
||||
%% Consume `_Size` bits from timestamp starting at `_Offset`th bit.
|
||||
%% TODO consistency
|
||||
{timestamp, _Offset :: bits(), _Size :: bits()}
|
||||
%% Consume next topic level (either one or all of them) and compute `_Size` bits-wide hash.
|
||||
| {hash, level | levels, _Size :: bits()}.
|
||||
|
||||
-opaque db() :: #db{}.
|
||||
-opaque iterator() :: #it{}.
|
||||
-type keymapper() :: #keymapper{}.
|
||||
-type keyspace_filter() :: #filter{}.
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
%% Create a new column family for the generation and a serializable representation of the schema
|
||||
-spec create_new(rocksdb:db_handle(), emqx_ds_storage_layer:gen_id(), options()) ->
|
||||
{schema(), emqx_ds_storage_layer:cf_refs()}.
|
||||
create_new(DBHandle, GenId, Options) ->
|
||||
CFName = data_cf(GenId),
|
||||
CFOptions = maps:get(cf_options, Options, []),
|
||||
{ok, CFHandle} = rocksdb:create_column_family(DBHandle, CFName, CFOptions),
|
||||
Schema = #schema{keymapper = make_keymapper(Options)},
|
||||
{Schema, [{CFName, CFHandle}]}.
|
||||
|
||||
%% Reopen the database
|
||||
-spec open(
|
||||
emqx_ds:shard(),
|
||||
rocksdb:db_handle(),
|
||||
emqx_ds_storage_layer:gen_id(),
|
||||
emqx_ds_storage_layer:cf_refs(),
|
||||
schema()
|
||||
) ->
|
||||
db().
|
||||
open(Shard, DBHandle, GenId, CFs, #schema{keymapper = Keymapper}) ->
|
||||
{value, {_, CFHandle}} = lists:keysearch(data_cf(GenId), 1, CFs),
|
||||
#db{
|
||||
shard = Shard,
|
||||
handle = DBHandle,
|
||||
cf = CFHandle,
|
||||
keymapper = Keymapper
|
||||
}.
|
||||
|
||||
-spec make_keymapper(options()) -> keymapper().
|
||||
make_keymapper(#{
|
||||
timestamp_bits := TimestampBits,
|
||||
topic_bits_per_level := BitsPerLevel,
|
||||
epoch := MaxEpoch
|
||||
}) ->
|
||||
TimestampLSBs = min(TimestampBits, floor(math:log2(MaxEpoch))),
|
||||
TimestampMSBs = TimestampBits - TimestampLSBs,
|
||||
NLevels = length(BitsPerLevel),
|
||||
{LevelBits, [TailLevelsBits]} = lists:split(NLevels - 1, BitsPerLevel),
|
||||
Source = lists:flatten([
|
||||
[{timestamp, TimestampLSBs, TimestampMSBs} || TimestampMSBs > 0],
|
||||
[{hash, level, Bits} || Bits <- LevelBits],
|
||||
{hash, levels, TailLevelsBits},
|
||||
[{timestamp, 0, TimestampLSBs} || TimestampLSBs > 0]
|
||||
]),
|
||||
#keymapper{
|
||||
source = Source,
|
||||
bitsize = lists:sum([S || {_, _, S} <- Source]),
|
||||
epoch = 1 bsl TimestampLSBs
|
||||
}.
|
||||
|
||||
-spec store(db(), emqx_guid:guid(), emqx_ds:time(), topic(), binary()) ->
|
||||
ok | {error, _TODO}.
|
||||
store(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic, MessagePayload) ->
|
||||
Key = make_message_key(Topic, PublishedAt, MessageID, DB#db.keymapper),
|
||||
Value = make_message_value(Topic, MessagePayload),
|
||||
rocksdb:put(DBHandle, CFHandle, Key, Value, DB#db.write_options).
|
||||
|
||||
-spec delete(db(), emqx_guid:guid(), emqx_ds:time(), topic()) ->
|
||||
ok | {error, _TODO}.
|
||||
delete(DB = #db{handle = DBHandle, cf = CFHandle}, MessageID, PublishedAt, Topic) ->
|
||||
Key = make_message_key(Topic, PublishedAt, MessageID, DB#db.keymapper),
|
||||
rocksdb:delete(DBHandle, CFHandle, Key, DB#db.write_options).
|
||||
|
||||
-spec make_iterator(db(), emqx_ds:replay()) ->
|
||||
{ok, iterator()} | {error, _TODO}.
|
||||
make_iterator(DB, Replay) ->
|
||||
{Keyspace, _ShardId} = DB#db.shard,
|
||||
Options = emqx_ds_conf:iteration_options(Keyspace),
|
||||
make_iterator(DB, Replay, Options).
|
||||
|
||||
-spec make_iterator(db(), emqx_ds:replay(), iteration_options()) ->
|
||||
% {error, invalid_start_time}? might just start from the beginning of time
|
||||
% and call it a day: client violated the contract anyway.
|
||||
{ok, iterator()} | {error, _TODO}.
|
||||
make_iterator(DB = #db{handle = DBHandle, cf = CFHandle}, Replay, Options) ->
|
||||
case rocksdb:iterator(DBHandle, CFHandle, DB#db.read_options) of
|
||||
{ok, ITHandle} ->
|
||||
Filter = make_keyspace_filter(Replay, DB#db.keymapper),
|
||||
InitialSeek = combine(compute_initial_seek(Filter), <<>>, DB#db.keymapper),
|
||||
RefreshCounter = make_refresh_counter(maps:get(iterator_refresh, Options, undefined)),
|
||||
{ok, #it{
|
||||
handle = ITHandle,
|
||||
filter = Filter,
|
||||
next_action = {seek, InitialSeek},
|
||||
refresh_counter = RefreshCounter
|
||||
}};
|
||||
Err ->
|
||||
Err
|
||||
end.
|
||||
|
||||
-spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}.
|
||||
next(It0 = #it{filter = #filter{keymapper = Keymapper}}) ->
|
||||
It = maybe_refresh_iterator(It0),
|
||||
case rocksdb:iterator_move(It#it.handle, It#it.next_action) of
|
||||
% spec says `{ok, Key}` is also possible but the implementation says it's not
|
||||
{ok, Key, Value} ->
|
||||
% Preserve last seen key in the iterator so it could be restored / refreshed later.
|
||||
ItNext = It#it{cursor = Key},
|
||||
Bitstring = extract(Key, Keymapper),
|
||||
case match_next(Bitstring, Value, It#it.filter) of
|
||||
{_Topic, Payload} ->
|
||||
{value, Payload, ItNext#it{next_action = next}};
|
||||
next ->
|
||||
next(ItNext#it{next_action = next});
|
||||
NextBitstring when is_integer(NextBitstring) ->
|
||||
NextSeek = combine(NextBitstring, <<>>, Keymapper),
|
||||
next(ItNext#it{next_action = {seek, NextSeek}});
|
||||
none ->
|
||||
stop_iteration(ItNext)
|
||||
end;
|
||||
{error, invalid_iterator} ->
|
||||
stop_iteration(It);
|
||||
{error, iterator_closed} ->
|
||||
{error, closed}
|
||||
end.
|
||||
|
||||
-spec preserve_iterator(iterator()) -> binary().
|
||||
preserve_iterator(#it{cursor = Cursor}) ->
|
||||
State = #{
|
||||
v => 1,
|
||||
cursor => Cursor
|
||||
},
|
||||
term_to_binary(State).
|
||||
|
||||
-spec restore_iterator(db(), emqx_ds:replay(), binary()) ->
|
||||
{ok, iterator()} | {error, _TODO}.
|
||||
restore_iterator(DB, Replay, Serial) when is_binary(Serial) ->
|
||||
State = binary_to_term(Serial),
|
||||
restore_iterator(DB, Replay, State);
|
||||
restore_iterator(DB, Replay, #{
|
||||
v := 1,
|
||||
cursor := Cursor
|
||||
}) ->
|
||||
case make_iterator(DB, Replay) of
|
||||
{ok, It} when Cursor == undefined ->
|
||||
% Iterator was preserved right after it has been made.
|
||||
{ok, It};
|
||||
{ok, It} ->
|
||||
% Iterator was preserved mid-replay, seek right past the last seen key.
|
||||
{ok, It#it{cursor = Cursor, next_action = {seek, successor(Cursor)}}};
|
||||
Err ->
|
||||
Err
|
||||
end.
|
||||
|
||||
-spec refresh_iterator(iterator()) -> iterator().
|
||||
refresh_iterator(It = #it{handle = Handle, cursor = Cursor, next_action = Action}) ->
|
||||
case rocksdb:iterator_refresh(Handle) of
|
||||
ok when Action =:= next ->
|
||||
% Now the underlying iterator is invalid, need to seek instead.
|
||||
It#it{next_action = {seek, successor(Cursor)}};
|
||||
ok ->
|
||||
% Now the underlying iterator is invalid, but will seek soon anyway.
|
||||
It;
|
||||
{error, _} ->
|
||||
% Implementation could in theory return an {error, ...} tuple.
|
||||
% Supposedly our best bet is to ignore it.
|
||||
% TODO logging?
|
||||
It
|
||||
end.
|
||||
|
||||
%%================================================================================
|
||||
%% Internal exports
|
||||
%%================================================================================
|
||||
|
||||
-spec keymapper_info(keymapper()) ->
|
||||
#{source := [bitsource()], bitsize := bits(), epoch := time()}.
|
||||
keymapper_info(#keymapper{source = Source, bitsize = Bitsize, epoch = Epoch}) ->
|
||||
#{source => Source, bitsize => Bitsize, epoch => Epoch}.
|
||||
|
||||
make_message_key(Topic, PublishedAt, MessageID, Keymapper) ->
|
||||
combine(compute_bitstring(Topic, PublishedAt, Keymapper), MessageID, Keymapper).
|
||||
|
||||
make_message_value(Topic, MessagePayload) ->
|
||||
term_to_binary({Topic, MessagePayload}).
|
||||
|
||||
unwrap_message_value(Binary) ->
|
||||
binary_to_term(Binary).
|
||||
|
||||
-spec combine(_Bitstring :: integer(), emqx_guid:guid() | <<>>, keymapper()) ->
|
||||
key().
|
||||
combine(Bitstring, MessageID, #keymapper{bitsize = Size}) ->
|
||||
<<Bitstring:Size/integer, MessageID/binary>>.
|
||||
|
||||
-spec extract(key(), keymapper()) ->
|
||||
_Bitstring :: integer().
|
||||
extract(Key, #keymapper{bitsize = Size}) ->
|
||||
<<Bitstring:Size/integer, _MessageID/binary>> = Key,
|
||||
Bitstring.
|
||||
|
||||
-spec compute_bitstring(topic_filter(), time(), keymapper()) -> integer().
|
||||
compute_bitstring(TopicFilter, Timestamp, #keymapper{source = Source}) ->
|
||||
compute_bitstring(TopicFilter, Timestamp, Source, 0).
|
||||
|
||||
-spec compute_topic_bitmask(topic_filter(), keymapper()) -> integer().
|
||||
compute_topic_bitmask(TopicFilter, #keymapper{source = Source}) ->
|
||||
compute_topic_bitmask(TopicFilter, Source, 0).
|
||||
|
||||
-spec compute_time_bitmask(keymapper()) -> integer().
|
||||
compute_time_bitmask(#keymapper{source = Source}) ->
|
||||
compute_time_bitmask(Source, 0).
|
||||
|
||||
-spec hash(term(), bits()) -> integer().
|
||||
hash(Input, Bits) ->
|
||||
% at most 32 bits
|
||||
erlang:phash2(Input, 1 bsl Bits).
|
||||
|
||||
-spec make_keyspace_filter(emqx_ds:replay(), keymapper()) -> keyspace_filter().
|
||||
make_keyspace_filter({TopicFilter, StartTime}, Keymapper) ->
|
||||
Bitstring = compute_bitstring(TopicFilter, StartTime, Keymapper),
|
||||
HashBitmask = compute_topic_bitmask(TopicFilter, Keymapper),
|
||||
TimeBitmask = compute_time_bitmask(Keymapper),
|
||||
HashBitfilter = Bitstring band HashBitmask,
|
||||
TimeBitfilter = Bitstring band TimeBitmask,
|
||||
#filter{
|
||||
keymapper = Keymapper,
|
||||
topic_filter = TopicFilter,
|
||||
start_time = StartTime,
|
||||
hash_bitfilter = HashBitfilter,
|
||||
hash_bitmask = HashBitmask,
|
||||
time_bitfilter = TimeBitfilter,
|
||||
time_bitmask = TimeBitmask
|
||||
}.
|
||||
|
||||
-spec compute_initial_seek(keyspace_filter()) -> integer().
|
||||
compute_initial_seek(#filter{hash_bitfilter = HashBitfilter, time_bitfilter = TimeBitfilter}) ->
|
||||
% Should be the same as `compute_initial_seek(0, Filter)`.
|
||||
HashBitfilter bor TimeBitfilter.
|
||||
|
||||
-spec compute_next_seek(integer(), keyspace_filter()) -> integer().
|
||||
compute_next_seek(
|
||||
Bitstring,
|
||||
Filter = #filter{
|
||||
hash_bitfilter = HashBitfilter,
|
||||
hash_bitmask = HashBitmask,
|
||||
time_bitfilter = TimeBitfilter,
|
||||
time_bitmask = TimeBitmask
|
||||
}
|
||||
) ->
|
||||
HashMatches = topic_hash_matches(Bitstring, HashBitfilter, HashBitmask),
|
||||
TimeMatches = time_matches(Bitstring, TimeBitfilter, TimeBitmask),
|
||||
compute_next_seek(HashMatches, TimeMatches, Bitstring, Filter).
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
compute_bitstring(Topic, Timestamp, [{timestamp, Offset, Size} | Rest], Acc) ->
|
||||
I = (Timestamp bsr Offset) band ones(Size),
|
||||
compute_bitstring(Topic, Timestamp, Rest, bitwise_concat(Acc, I, Size));
|
||||
compute_bitstring([], Timestamp, [{hash, level, Size} | Rest], Acc) ->
|
||||
I = hash(<<"/">>, Size),
|
||||
compute_bitstring([], Timestamp, Rest, bitwise_concat(Acc, I, Size));
|
||||
compute_bitstring([Level | Tail], Timestamp, [{hash, level, Size} | Rest], Acc) ->
|
||||
I = hash(Level, Size),
|
||||
compute_bitstring(Tail, Timestamp, Rest, bitwise_concat(Acc, I, Size));
|
||||
compute_bitstring(Tail, Timestamp, [{hash, levels, Size} | Rest], Acc) ->
|
||||
I = hash(Tail, Size),
|
||||
compute_bitstring(Tail, Timestamp, Rest, bitwise_concat(Acc, I, Size));
|
||||
compute_bitstring(_, _, [], Acc) ->
|
||||
Acc.
|
||||
|
||||
compute_topic_bitmask(Filter, [{timestamp, _, Size} | Rest], Acc) ->
|
||||
compute_topic_bitmask(Filter, Rest, bitwise_concat(Acc, 0, Size));
|
||||
compute_topic_bitmask(['#'], [{hash, _, Size} | Rest], Acc) ->
|
||||
compute_topic_bitmask(['#'], Rest, bitwise_concat(Acc, 0, Size));
|
||||
compute_topic_bitmask(['+' | Tail], [{hash, _, Size} | Rest], Acc) ->
|
||||
compute_topic_bitmask(Tail, Rest, bitwise_concat(Acc, 0, Size));
|
||||
compute_topic_bitmask([], [{hash, level, Size} | Rest], Acc) ->
|
||||
compute_topic_bitmask([], Rest, bitwise_concat(Acc, ones(Size), Size));
|
||||
compute_topic_bitmask([_ | Tail], [{hash, level, Size} | Rest], Acc) ->
|
||||
compute_topic_bitmask(Tail, Rest, bitwise_concat(Acc, ones(Size), Size));
|
||||
compute_topic_bitmask(Tail, [{hash, levels, Size} | Rest], Acc) ->
|
||||
Mask =
|
||||
case lists:member('+', Tail) orelse lists:member('#', Tail) of
|
||||
true -> 0;
|
||||
false -> ones(Size)
|
||||
end,
|
||||
compute_topic_bitmask([], Rest, bitwise_concat(Acc, Mask, Size));
|
||||
compute_topic_bitmask(_, [], Acc) ->
|
||||
Acc.
|
||||
|
||||
compute_time_bitmask([{timestamp, _, Size} | Rest], Acc) ->
|
||||
compute_time_bitmask(Rest, bitwise_concat(Acc, ones(Size), Size));
|
||||
compute_time_bitmask([{hash, _, Size} | Rest], Acc) ->
|
||||
compute_time_bitmask(Rest, bitwise_concat(Acc, 0, Size));
|
||||
compute_time_bitmask([], Acc) ->
|
||||
Acc.
|
||||
|
||||
bitwise_concat(Acc, Item, ItemSize) ->
|
||||
(Acc bsl ItemSize) bor Item.
|
||||
|
||||
ones(Bits) ->
|
||||
1 bsl Bits - 1.
|
||||
|
||||
-spec successor(key()) -> key().
|
||||
successor(Key) ->
|
||||
<<Key/binary, 0:8>>.
|
||||
|
||||
%% |123|345|678|
|
||||
%% foo bar baz
|
||||
|
||||
%% |123|000|678| - |123|fff|678|
|
||||
|
||||
%% foo + baz
|
||||
|
||||
%% |fff|000|fff|
|
||||
|
||||
%% |123|000|678|
|
||||
|
||||
%% |123|056|678| & |fff|000|fff| = |123|000|678|.
|
||||
|
||||
match_next(
|
||||
Bitstring,
|
||||
Value,
|
||||
Filter = #filter{
|
||||
topic_filter = TopicFilter,
|
||||
hash_bitfilter = HashBitfilter,
|
||||
hash_bitmask = HashBitmask,
|
||||
time_bitfilter = TimeBitfilter,
|
||||
time_bitmask = TimeBitmask
|
||||
}
|
||||
) ->
|
||||
HashMatches = topic_hash_matches(Bitstring, HashBitfilter, HashBitmask),
|
||||
TimeMatches = time_matches(Bitstring, TimeBitfilter, TimeBitmask),
|
||||
case HashMatches and TimeMatches of
|
||||
true ->
|
||||
Message = {Topic, _Payload} = unwrap_message_value(Value),
|
||||
case emqx_topic:match(Topic, TopicFilter) of
|
||||
true ->
|
||||
Message;
|
||||
false ->
|
||||
next
|
||||
end;
|
||||
false ->
|
||||
compute_next_seek(HashMatches, TimeMatches, Bitstring, Filter)
|
||||
end.
|
||||
|
||||
%% `Bitstring` is out of the hash space defined by `HashBitfilter`.
|
||||
compute_next_seek(
|
||||
_HashMatches = false,
|
||||
_TimeMatches,
|
||||
Bitstring,
|
||||
Filter = #filter{
|
||||
keymapper = Keymapper,
|
||||
hash_bitfilter = HashBitfilter,
|
||||
hash_bitmask = HashBitmask,
|
||||
time_bitfilter = TimeBitfilter,
|
||||
time_bitmask = TimeBitmask
|
||||
}
|
||||
) ->
|
||||
NextBitstring = compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper),
|
||||
case NextBitstring of
|
||||
none ->
|
||||
none;
|
||||
_ ->
|
||||
TimeMatches = time_matches(NextBitstring, TimeBitfilter, TimeBitmask),
|
||||
compute_next_seek(true, TimeMatches, NextBitstring, Filter)
|
||||
end;
|
||||
%% `Bitstring` is out of the time range defined by `TimeBitfilter`.
|
||||
compute_next_seek(
|
||||
_HashMatches = true,
|
||||
_TimeMatches = false,
|
||||
Bitstring,
|
||||
#filter{
|
||||
time_bitfilter = TimeBitfilter,
|
||||
time_bitmask = TimeBitmask
|
||||
}
|
||||
) ->
|
||||
compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask);
|
||||
compute_next_seek(true, true, Bitstring, _It) ->
|
||||
Bitstring.
|
||||
|
||||
topic_hash_matches(Bitstring, HashBitfilter, HashBitmask) ->
|
||||
(Bitstring band HashBitmask) == HashBitfilter.
|
||||
|
||||
time_matches(Bitstring, TimeBitfilter, TimeBitmask) ->
|
||||
(Bitstring band TimeBitmask) >= TimeBitfilter.
|
||||
|
||||
compute_time_seek(Bitstring, TimeBitfilter, TimeBitmask) ->
|
||||
% Replace the bits of the timestamp in `Bistring` with bits from `Timebitfilter`.
|
||||
(Bitstring band (bnot TimeBitmask)) bor TimeBitfilter.
|
||||
|
||||
%% Find the closest bitstring which is:
|
||||
%% * greater than `Bitstring`,
|
||||
%% * and falls into the hash space defined by `HashBitfilter`.
|
||||
%% Note that the result can end up "back" in time and out of the time range.
|
||||
compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Keymapper) ->
|
||||
Sources = Keymapper#keymapper.source,
|
||||
Size = Keymapper#keymapper.bitsize,
|
||||
compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size).
|
||||
|
||||
compute_topic_seek(Bitstring, HashBitfilter, HashBitmask, Sources, Size) ->
|
||||
% NOTE
|
||||
% We're iterating through `Substring` here, in lockstep with `HashBitfilter`
|
||||
% and `HashBitmask`, starting from least signigicant bits. Each bitsource in
|
||||
% `Sources` has a bitsize `S` and, accordingly, gives us a sub-bitstring `S`
|
||||
% bits long which we interpret as a "digit". There are 2 flavors of those
|
||||
% "digits":
|
||||
% * regular digit with 2^S possible values,
|
||||
% * degenerate digit with exactly 1 possible value U (represented with 0).
|
||||
% Our goal here is to find a successor of `Bistring` and perform a kind of
|
||||
% digit-by-digit addition operation with carry propagation.
|
||||
NextSeek = zipfoldr3(
|
||||
fun(Source, Substring, Filter, LBitmask, Offset, Acc) ->
|
||||
case Source of
|
||||
{hash, _, S} when LBitmask =:= 0 ->
|
||||
% Regular case
|
||||
bitwise_add_digit(Substring, Acc, S, Offset);
|
||||
{hash, _, _} when LBitmask =/= 0, Substring < Filter ->
|
||||
% Degenerate case, I_digit < U, no overflow.
|
||||
% Successor is `U bsl Offset` which is equivalent to 0.
|
||||
0;
|
||||
{hash, _, S} when LBitmask =/= 0, Substring > Filter ->
|
||||
% Degenerate case, I_digit > U, overflow.
|
||||
% Successor is `(1 bsl Size + U) bsl Offset`.
|
||||
overflow_digit(S, Offset);
|
||||
{hash, _, S} when LBitmask =/= 0 ->
|
||||
% Degenerate case, I_digit = U
|
||||
% Perform digit addition with I_digit = 0, assuming "digit" has
|
||||
% 0 bits of information (but is `S` bits long at the same time).
|
||||
% This will overflow only if the result of previous iteration
|
||||
% was an overflow.
|
||||
bitwise_add_digit(0, Acc, 0, S, Offset);
|
||||
{timestamp, _, S} ->
|
||||
% Regular case
|
||||
bitwise_add_digit(Substring, Acc, S, Offset)
|
||||
end
|
||||
end,
|
||||
0,
|
||||
Bitstring,
|
||||
HashBitfilter,
|
||||
HashBitmask,
|
||||
Size,
|
||||
Sources
|
||||
),
|
||||
case NextSeek bsr Size of
|
||||
_Carry = 0 ->
|
||||
% Found the successor.
|
||||
% We need to recover values of those degenerate digits which we
|
||||
% represented with 0 during digit-by-digit iteration.
|
||||
NextSeek bor (HashBitfilter band HashBitmask);
|
||||
_Carry = 1 ->
|
||||
% We got "carried away" past the range, time to stop iteration.
|
||||
none
|
||||
end.
|
||||
|
||||
bitwise_add_digit(Digit, Number, Width, Offset) ->
|
||||
bitwise_add_digit(Digit, Number, Width, Width, Offset).
|
||||
|
||||
%% Add "digit" (represented with integer `Digit`) to the `Number` assuming
|
||||
%% this digit starts at `Offset` bits in `Number` and is `Width` bits long.
|
||||
%% Perform an overflow if the result of addition would not fit into `Bits`
|
||||
%% bits.
|
||||
bitwise_add_digit(Digit, Number, Bits, Width, Offset) ->
|
||||
Sum = (Digit bsl Offset) + Number,
|
||||
case (Sum bsr Offset) < (1 bsl Bits) of
|
||||
true -> Sum;
|
||||
false -> overflow_digit(Width, Offset)
|
||||
end.
|
||||
|
||||
%% Constuct a number which denotes an overflow of digit that starts at
|
||||
%% `Offset` bits and is `Width` bits long.
|
||||
overflow_digit(Width, Offset) ->
|
||||
(1 bsl Width) bsl Offset.
|
||||
|
||||
%% Iterate through sub-bitstrings of 3 integers in lockstep, starting from least
|
||||
%% significant bits first.
|
||||
%%
|
||||
%% Each integer is assumed to be `Size` bits long. Lengths of sub-bitstring are
|
||||
%% specified in `Sources` list, in order from most significant bits to least
|
||||
%% significant. Each iteration calls `FoldFun` with:
|
||||
%% * bitsource that was used to extract sub-bitstrings,
|
||||
%% * 3 sub-bitstrings in integer representation,
|
||||
%% * bit offset into integers,
|
||||
%% * current accumulator.
|
||||
-spec zipfoldr3(FoldFun, Acc, integer(), integer(), integer(), _Size :: bits(), [bitsource()]) ->
|
||||
Acc
|
||||
when
|
||||
FoldFun :: fun((bitsource(), integer(), integer(), integer(), _Offset :: bits(), Acc) -> Acc).
|
||||
zipfoldr3(_FoldFun, Acc, _, _, _, 0, []) ->
|
||||
Acc;
|
||||
zipfoldr3(FoldFun, Acc, I1, I2, I3, Offset, [Source = {_, _, S} | Rest]) ->
|
||||
OffsetNext = Offset - S,
|
||||
AccNext = zipfoldr3(FoldFun, Acc, I1, I2, I3, OffsetNext, Rest),
|
||||
FoldFun(
|
||||
Source,
|
||||
substring(I1, OffsetNext, S),
|
||||
substring(I2, OffsetNext, S),
|
||||
substring(I3, OffsetNext, S),
|
||||
OffsetNext,
|
||||
AccNext
|
||||
).
|
||||
|
||||
substring(I, Offset, Size) ->
|
||||
(I bsr Offset) band ones(Size).
|
||||
|
||||
%% @doc Generate a column family ID for the MQTT messages
|
||||
-spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()].
|
||||
data_cf(GenId) ->
|
||||
?MODULE_STRING ++ integer_to_list(GenId).
|
||||
|
||||
make_refresh_counter({every, N}) when is_integer(N), N > 0 ->
|
||||
{0, N};
|
||||
make_refresh_counter(undefined) ->
|
||||
undefined.
|
||||
|
||||
maybe_refresh_iterator(It = #it{refresh_counter = {N, N}}) ->
|
||||
refresh_iterator(It#it{refresh_counter = {0, N}});
|
||||
maybe_refresh_iterator(It = #it{refresh_counter = {M, N}}) ->
|
||||
It#it{refresh_counter = {M + 1, N}};
|
||||
maybe_refresh_iterator(It = #it{refresh_counter = undefined}) ->
|
||||
It.
|
||||
|
||||
stop_iteration(It) ->
|
||||
ok = rocksdb:iterator_close(It#it.handle),
|
||||
none.
|
|
@ -0,0 +1,217 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% @doc Replication layer for DS backends that don't support
|
||||
%% replication on their own.
|
||||
-module(emqx_ds_replication_layer).
|
||||
|
||||
-export([
|
||||
list_shards/1,
|
||||
open_db/2,
|
||||
drop_db/1,
|
||||
store_batch/3,
|
||||
get_streams/3,
|
||||
make_iterator/3,
|
||||
next/2
|
||||
]).
|
||||
|
||||
%% internal exports:
|
||||
-export([
|
||||
do_open_shard_v1/2,
|
||||
do_drop_shard_v1/1,
|
||||
do_get_streams_v1/3,
|
||||
do_make_iterator_v1/4,
|
||||
do_next_v1/3
|
||||
]).
|
||||
|
||||
-export_type([shard_id/0, stream/0, iterator/0, message_id/0]).
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
-type db() :: emqx_ds:db().
|
||||
|
||||
-type shard_id() :: {db(), atom()}.
|
||||
|
||||
%% This record enapsulates the stream entity from the replication
|
||||
%% level.
|
||||
%%
|
||||
%% TODO: currently the stream is hardwired to only support the
|
||||
%% internal rocksdb storage. In the future we want to add another
|
||||
%% implementations for emqx_ds, so this type has to take this into
|
||||
%% account.
|
||||
-record(stream, {
|
||||
shard :: emqx_ds_replication_layer:shard_id(),
|
||||
enc :: emqx_ds_storage_layer:stream()
|
||||
}).
|
||||
|
||||
-opaque stream() :: #stream{}.
|
||||
|
||||
-record(iterator, {
|
||||
shard :: emqx_ds_replication_layer:shard_id(),
|
||||
enc :: enqx_ds_storage_layer:iterator()
|
||||
}).
|
||||
|
||||
-opaque iterator() :: #iterator{}.
|
||||
|
||||
-type message_id() :: emqx_ds_storage_layer:message_id().
|
||||
|
||||
%%================================================================================
|
||||
%% API functions
|
||||
%%================================================================================
|
||||
|
||||
-spec list_shards(db()) -> [shard_id()].
|
||||
list_shards(DB) ->
|
||||
%% TODO: milestone 5
|
||||
lists:map(
|
||||
fun(Node) ->
|
||||
shard_id(DB, Node)
|
||||
end,
|
||||
list_nodes()
|
||||
).
|
||||
|
||||
-spec open_db(db(), emqx_ds:create_db_opts()) -> ok | {error, _}.
|
||||
open_db(DB, Opts) ->
|
||||
%% TODO: improve error reporting, don't just crash
|
||||
lists:foreach(
|
||||
fun(Node) ->
|
||||
Shard = shard_id(DB, Node),
|
||||
ok = emqx_ds_proto_v1:open_shard(Node, Shard, Opts)
|
||||
end,
|
||||
list_nodes()
|
||||
).
|
||||
|
||||
-spec drop_db(db()) -> ok | {error, _}.
|
||||
drop_db(DB) ->
|
||||
lists:foreach(
|
||||
fun(Node) ->
|
||||
Shard = shard_id(DB, Node),
|
||||
ok = emqx_ds_proto_v1:drop_shard(Node, Shard)
|
||||
end,
|
||||
list_nodes()
|
||||
).
|
||||
|
||||
-spec store_batch(db(), [emqx_types:message()], emqx_ds:message_store_opts()) ->
|
||||
emqx_ds:store_batch_result().
|
||||
store_batch(DB, Msg, Opts) ->
|
||||
%% TODO: Currently we store messages locally.
|
||||
Shard = shard_id(DB, node()),
|
||||
emqx_ds_storage_layer:store_batch(Shard, Msg, Opts).
|
||||
|
||||
-spec get_streams(db(), emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||
[{emqx_ds:stream_rank(), stream()}].
|
||||
get_streams(DB, TopicFilter, StartTime) ->
|
||||
Shards = list_shards(DB),
|
||||
lists:flatmap(
|
||||
fun(Shard) ->
|
||||
Node = node_of_shard(Shard),
|
||||
Streams = emqx_ds_proto_v1:get_streams(Node, Shard, TopicFilter, StartTime),
|
||||
lists:map(
|
||||
fun({RankY, Stream}) ->
|
||||
RankX = Shard,
|
||||
Rank = {RankX, RankY},
|
||||
{Rank, #stream{
|
||||
shard = Shard,
|
||||
enc = Stream
|
||||
}}
|
||||
end,
|
||||
Streams
|
||||
)
|
||||
end,
|
||||
Shards
|
||||
).
|
||||
|
||||
-spec make_iterator(stream(), emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||
emqx_ds:make_iterator_result(iterator()).
|
||||
make_iterator(Stream, TopicFilter, StartTime) ->
|
||||
#stream{shard = Shard, enc = StorageStream} = Stream,
|
||||
Node = node_of_shard(Shard),
|
||||
case emqx_ds_proto_v1:make_iterator(Node, Shard, StorageStream, TopicFilter, StartTime) of
|
||||
{ok, Iter} ->
|
||||
{ok, #iterator{shard = Shard, enc = Iter}};
|
||||
Err = {error, _} ->
|
||||
Err
|
||||
end.
|
||||
|
||||
-spec next(iterator(), pos_integer()) -> emqx_ds:next_result(iterator()).
|
||||
next(Iter0, BatchSize) ->
|
||||
#iterator{shard = Shard, enc = StorageIter0} = Iter0,
|
||||
Node = node_of_shard(Shard),
|
||||
%% TODO: iterator can contain information that is useful for
|
||||
%% reconstructing messages sent over the network. For example,
|
||||
%% when we send messages with the learned topic index, we could
|
||||
%% send the static part of topic once, and append it to the
|
||||
%% messages on the receiving node, hence saving some network.
|
||||
%%
|
||||
%% This kind of trickery should be probably done here in the
|
||||
%% replication layer. Or, perhaps, in the logic layer.
|
||||
case emqx_ds_proto_v1:next(Node, Shard, StorageIter0, BatchSize) of
|
||||
{ok, StorageIter, Batch} ->
|
||||
Iter = #iterator{shard = Shard, enc = StorageIter},
|
||||
{ok, Iter, Batch};
|
||||
Other ->
|
||||
Other
|
||||
end.
|
||||
|
||||
%%================================================================================
|
||||
%% behavior callbacks
|
||||
%%================================================================================
|
||||
|
||||
%%================================================================================
|
||||
%% Internal exports (RPC targets)
|
||||
%%================================================================================
|
||||
|
||||
-spec do_open_shard_v1(shard_id(), emqx_ds:create_db_opts()) -> ok.
|
||||
do_open_shard_v1(Shard, Opts) ->
|
||||
emqx_ds_storage_layer:open_shard(Shard, Opts).
|
||||
|
||||
-spec do_drop_shard_v1(shard_id()) -> ok.
|
||||
do_drop_shard_v1(Shard) ->
|
||||
emqx_ds_storage_layer:drop_shard(Shard).
|
||||
|
||||
-spec do_get_streams_v1(shard_id(), emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||
[{integer(), _Stream}].
|
||||
do_get_streams_v1(Shard, TopicFilter, StartTime) ->
|
||||
emqx_ds_storage_layer:get_streams(Shard, TopicFilter, StartTime).
|
||||
|
||||
-spec do_make_iterator_v1(
|
||||
shard_id(), emqx_ds_storage_layer:stream(), emqx_ds:topic_filter(), emqx_ds:time()
|
||||
) ->
|
||||
{ok, iterator()} | {error, _}.
|
||||
do_make_iterator_v1(Shard, Stream, TopicFilter, StartTime) ->
|
||||
emqx_ds_storage_layer:make_iterator(Shard, Stream, TopicFilter, StartTime).
|
||||
|
||||
-spec do_next_v1(shard_id(), emqx_ds_storage_layer:iterator(), pos_integer()) ->
|
||||
emqx_ds:next_result(emqx_ds_storage_layer:iterator()).
|
||||
do_next_v1(Shard, Iter, BatchSize) ->
|
||||
emqx_ds_storage_layer:next(Shard, Iter, BatchSize).
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
shard_id(DB, Node) ->
|
||||
%% TODO: don't bake node name into the schema, don't repeat the
|
||||
%% Mnesia's 1M$ mistake.
|
||||
{DB, Node}.
|
||||
|
||||
-spec node_of_shard(shard_id()) -> node().
|
||||
node_of_shard({_DB, Node}) ->
|
||||
Node.
|
||||
|
||||
list_nodes() ->
|
||||
mria:running_nodes().
|
|
@ -0,0 +1,418 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% @doc A storage layout based on learned topic structure and using
|
||||
%% bitfield mapping for the varying topic layers.
|
||||
-module(emqx_ds_storage_bitfield_lts).
|
||||
|
||||
-behaviour(emqx_ds_storage_layer).
|
||||
|
||||
%% API:
|
||||
-export([]).
|
||||
|
||||
%% behavior callbacks:
|
||||
-export([create/4, open/5, store_batch/4, get_streams/4, make_iterator/5, next/4]).
|
||||
|
||||
%% internal exports:
|
||||
-export([format_key/2]).
|
||||
|
||||
-export_type([options/0]).
|
||||
|
||||
-include_lib("emqx_utils/include/emqx_message.hrl").
|
||||
-include_lib("snabbkaffe/include/trace.hrl").
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
-type options() ::
|
||||
#{
|
||||
bits_per_wildcard_level => pos_integer(),
|
||||
topic_index_bytes => pos_integer(),
|
||||
epoch_bits => non_neg_integer()
|
||||
}.
|
||||
|
||||
%% Permanent state:
|
||||
-type schema() ::
|
||||
#{
|
||||
bits_per_wildcard_level := pos_integer(),
|
||||
topic_index_bytes := pos_integer(),
|
||||
ts_bits := non_neg_integer(),
|
||||
ts_offset_bits := non_neg_integer()
|
||||
}.
|
||||
|
||||
%% Runtime state:
|
||||
-record(s, {
|
||||
db :: rocksdb:db_handle(),
|
||||
data :: rocksdb:cf_handle(),
|
||||
trie :: emqx_ds_lts:trie(),
|
||||
keymappers :: array:array(emqx_ds_bitmask_keymapper:keymapper()),
|
||||
ts_offset :: non_neg_integer()
|
||||
}).
|
||||
|
||||
-type s() :: #s{}.
|
||||
|
||||
-record(stream, {
|
||||
storage_key :: emqx_ds_lts:msg_storage_key()
|
||||
}).
|
||||
|
||||
-record(it, {
|
||||
topic_filter :: emqx_ds:topic_filter(),
|
||||
start_time :: emqx_ds:time(),
|
||||
storage_key :: emqx_ds_lts:msg_storage_key(),
|
||||
last_seen_key = <<>> :: binary()
|
||||
}).
|
||||
|
||||
-type iterator() :: #it{}.
|
||||
|
||||
-define(COUNTER, emqx_ds_storage_bitfield_lts_counter).
|
||||
|
||||
%% Limit on the number of wildcard levels in the learned topic trie:
|
||||
-define(WILDCARD_LIMIT, 10).
|
||||
|
||||
-include("emqx_ds_bitmask.hrl").
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
%%================================================================================
|
||||
%% behavior callbacks
|
||||
%%================================================================================
|
||||
|
||||
-spec create(
|
||||
emqx_ds_replication_layer:shard_id(),
|
||||
rocksdb:db_handle(),
|
||||
emqx_ds_storage_layer:gen_id(),
|
||||
options()
|
||||
) ->
|
||||
{schema(), emqx_ds_storage_layer:cf_refs()}.
|
||||
create(_ShardId, DBHandle, GenId, Options) ->
|
||||
%% Get options:
|
||||
BitsPerTopicLevel = maps:get(bits_per_wildcard_level, Options, 64),
|
||||
TopicIndexBytes = maps:get(topic_index_bytes, Options, 4),
|
||||
%% 10 bits -> 1024 ms -> ~1 sec
|
||||
TSOffsetBits = maps:get(epoch_bits, Options, 10),
|
||||
%% Create column families:
|
||||
DataCFName = data_cf(GenId),
|
||||
TrieCFName = trie_cf(GenId),
|
||||
{ok, DataCFHandle} = rocksdb:create_column_family(DBHandle, DataCFName, []),
|
||||
{ok, TrieCFHandle} = rocksdb:create_column_family(DBHandle, TrieCFName, []),
|
||||
%% Create schema:
|
||||
Schema = #{
|
||||
bits_per_wildcard_level => BitsPerTopicLevel,
|
||||
topic_index_bytes => TopicIndexBytes,
|
||||
ts_bits => 64,
|
||||
ts_offset_bits => TSOffsetBits
|
||||
},
|
||||
{Schema, [{DataCFName, DataCFHandle}, {TrieCFName, TrieCFHandle}]}.
|
||||
|
||||
-spec open(
|
||||
emqx_ds_replication_layer:shard_id(),
|
||||
rocksdb:db_handle(),
|
||||
emqx_ds_storage_layer:gen_id(),
|
||||
emqx_ds_storage_layer:cf_refs(),
|
||||
schema()
|
||||
) ->
|
||||
s().
|
||||
open(_Shard, DBHandle, GenId, CFRefs, Schema) ->
|
||||
#{
|
||||
bits_per_wildcard_level := BitsPerTopicLevel,
|
||||
topic_index_bytes := TopicIndexBytes,
|
||||
ts_bits := TSBits,
|
||||
ts_offset_bits := TSOffsetBits
|
||||
} = Schema,
|
||||
{_, DataCF} = lists:keyfind(data_cf(GenId), 1, CFRefs),
|
||||
{_, TrieCF} = lists:keyfind(trie_cf(GenId), 1, CFRefs),
|
||||
Trie = restore_trie(TopicIndexBytes, DBHandle, TrieCF),
|
||||
%% If user's topics have more than learned 10 wildcard levels
|
||||
%% (more than 2, really), then it's total carnage; learned topic
|
||||
%% structure won't help.
|
||||
MaxWildcardLevels = ?WILDCARD_LIMIT,
|
||||
KeymapperCache = array:from_list(
|
||||
[
|
||||
make_keymapper(TopicIndexBytes, BitsPerTopicLevel, TSBits, TSOffsetBits, N)
|
||||
|| N <- lists:seq(0, MaxWildcardLevels)
|
||||
]
|
||||
),
|
||||
#s{
|
||||
db = DBHandle,
|
||||
data = DataCF,
|
||||
trie = Trie,
|
||||
keymappers = KeymapperCache,
|
||||
ts_offset = TSOffsetBits
|
||||
}.
|
||||
|
||||
-spec store_batch(
|
||||
emqx_ds_replication_layer:shard_id(), s(), [emqx_types:message()], emqx_ds:message_store_opts()
|
||||
) ->
|
||||
emqx_ds:store_batch_result().
|
||||
store_batch(_ShardId, S = #s{db = DB, data = Data}, Messages, _Options) ->
|
||||
lists:foreach(
|
||||
fun(Msg) ->
|
||||
{Key, _} = make_key(S, Msg),
|
||||
Val = serialize(Msg),
|
||||
rocksdb:put(DB, Data, Key, Val, [])
|
||||
end,
|
||||
Messages
|
||||
).
|
||||
|
||||
get_streams(_Shard, #s{trie = Trie}, TopicFilter, _StartTime) ->
|
||||
Indexes = emqx_ds_lts:match_topics(Trie, TopicFilter),
|
||||
[#stream{storage_key = I} || I <- Indexes].
|
||||
|
||||
make_iterator(_Shard, _Data, #stream{storage_key = StorageKey}, TopicFilter, StartTime) ->
|
||||
%% Note: it's a good idea to keep the iterator structure lean,
|
||||
%% since it can be stored on a remote node that could update its
|
||||
%% code independently from us.
|
||||
{ok, #it{
|
||||
topic_filter = TopicFilter,
|
||||
start_time = StartTime,
|
||||
storage_key = StorageKey
|
||||
}}.
|
||||
|
||||
next(_Shard, Schema = #s{ts_offset = TSOffset}, It, BatchSize) ->
|
||||
%% Compute safe cutoff time.
|
||||
%% It's the point in time where the last complete epoch ends, so we need to know
|
||||
%% the current time to compute it.
|
||||
Now = emqx_message:timestamp_now(),
|
||||
SafeCutoffTime = (Now bsr TSOffset) bsl TSOffset,
|
||||
next_until(Schema, It, SafeCutoffTime, BatchSize).
|
||||
|
||||
next_until(_Schema, It, SafeCutoffTime, _BatchSize) when It#it.start_time >= SafeCutoffTime ->
|
||||
%% We're in the middle of the current epoch, so we can't yet iterate over it.
|
||||
%% It would be unsafe otherwise: messages can be stored in the current epoch
|
||||
%% concurrently with iterating over it. They can end up earlier (in the iteration
|
||||
%% order) due to the nature of keymapping, potentially causing us to miss them.
|
||||
{ok, It, []};
|
||||
next_until(#s{db = DB, data = CF, keymappers = Keymappers}, It, SafeCutoffTime, BatchSize) ->
|
||||
#it{
|
||||
start_time = StartTime,
|
||||
storage_key = {TopicIndex, Varying}
|
||||
} = It,
|
||||
%% Make filter:
|
||||
Inequations = [
|
||||
{'=', TopicIndex},
|
||||
{StartTime, '..', SafeCutoffTime - 1},
|
||||
%% Unique integer:
|
||||
any
|
||||
%% Varying topic levels:
|
||||
| lists:map(
|
||||
fun
|
||||
('+') ->
|
||||
any;
|
||||
(TopicLevel) when is_binary(TopicLevel) ->
|
||||
{'=', hash_topic_level(TopicLevel)}
|
||||
end,
|
||||
Varying
|
||||
)
|
||||
],
|
||||
%% Obtain a keymapper for the current number of varying levels.
|
||||
NVarying = length(Varying),
|
||||
%% Assert:
|
||||
NVarying =< ?WILDCARD_LIMIT orelse
|
||||
error({too_many_varying_topic_levels, NVarying}),
|
||||
Keymapper = array:get(NVarying, Keymappers),
|
||||
Filter =
|
||||
#filter{range_min = LowerBound, range_max = UpperBound} = emqx_ds_bitmask_keymapper:make_filter(
|
||||
Keymapper, Inequations
|
||||
),
|
||||
{ok, ITHandle} = rocksdb:iterator(DB, CF, [
|
||||
{iterate_lower_bound, emqx_ds_bitmask_keymapper:key_to_bitstring(Keymapper, LowerBound)},
|
||||
{iterate_upper_bound, emqx_ds_bitmask_keymapper:key_to_bitstring(Keymapper, UpperBound + 1)}
|
||||
]),
|
||||
try
|
||||
put(?COUNTER, 0),
|
||||
next_loop(ITHandle, Keymapper, Filter, SafeCutoffTime, It, [], BatchSize)
|
||||
after
|
||||
rocksdb:iterator_close(ITHandle),
|
||||
erase(?COUNTER)
|
||||
end.
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
next_loop(_ITHandle, _KeyMapper, _Filter, _Cutoff, It, Acc, 0) ->
|
||||
{ok, It, lists:reverse(Acc)};
|
||||
next_loop(ITHandle, KeyMapper, Filter, Cutoff, It0, Acc0, N0) ->
|
||||
inc_counter(),
|
||||
#it{last_seen_key = Key0} = It0,
|
||||
case emqx_ds_bitmask_keymapper:bin_increment(Filter, Key0) of
|
||||
overflow ->
|
||||
{ok, It0, lists:reverse(Acc0)};
|
||||
Key1 ->
|
||||
%% assert
|
||||
true = Key1 > Key0,
|
||||
case rocksdb:iterator_move(ITHandle, {seek, Key1}) of
|
||||
{ok, Key, Val} ->
|
||||
{N, It, Acc} =
|
||||
traverse_interval(ITHandle, Filter, Cutoff, Key, Val, It0, Acc0, N0),
|
||||
next_loop(ITHandle, KeyMapper, Filter, Cutoff, It, Acc, N);
|
||||
{error, invalid_iterator} ->
|
||||
{ok, It0, lists:reverse(Acc0)}
|
||||
end
|
||||
end.
|
||||
|
||||
traverse_interval(ITHandle, Filter, Cutoff, Key, Val, It0, Acc0, N) ->
|
||||
It = It0#it{last_seen_key = Key},
|
||||
case emqx_ds_bitmask_keymapper:bin_checkmask(Filter, Key) of
|
||||
true ->
|
||||
Msg = deserialize(Val),
|
||||
case check_message(Cutoff, It, Msg) of
|
||||
true ->
|
||||
Acc = [Msg | Acc0],
|
||||
traverse_interval(ITHandle, Filter, Cutoff, It, Acc, N - 1);
|
||||
false ->
|
||||
traverse_interval(ITHandle, Filter, Cutoff, It, Acc0, N);
|
||||
overflow ->
|
||||
{0, It0, Acc0}
|
||||
end;
|
||||
false ->
|
||||
{N, It, Acc0}
|
||||
end.
|
||||
|
||||
traverse_interval(_ITHandle, _Filter, _Cutoff, It, Acc, 0) ->
|
||||
{0, It, Acc};
|
||||
traverse_interval(ITHandle, Filter, Cutoff, It, Acc, N) ->
|
||||
inc_counter(),
|
||||
case rocksdb:iterator_move(ITHandle, next) of
|
||||
{ok, Key, Val} ->
|
||||
traverse_interval(ITHandle, Filter, Cutoff, Key, Val, It, Acc, N);
|
||||
{error, invalid_iterator} ->
|
||||
{0, It, Acc}
|
||||
end.
|
||||
|
||||
-spec check_message(emqx_ds:time(), iterator(), emqx_types:message()) ->
|
||||
true | false | overflow.
|
||||
check_message(
|
||||
Cutoff,
|
||||
_It,
|
||||
#message{timestamp = Timestamp}
|
||||
) when Timestamp >= Cutoff ->
|
||||
%% We hit the current epoch, we can't continue iterating over it yet.
|
||||
%% It would be unsafe otherwise: messages can be stored in the current epoch
|
||||
%% concurrently with iterating over it. They can end up earlier (in the iteration
|
||||
%% order) due to the nature of keymapping, potentially causing us to miss them.
|
||||
overflow;
|
||||
check_message(
|
||||
_Cutoff,
|
||||
#it{start_time = StartTime, topic_filter = TopicFilter},
|
||||
#message{timestamp = Timestamp, topic = Topic}
|
||||
) when Timestamp >= StartTime ->
|
||||
emqx_topic:match(emqx_topic:words(Topic), TopicFilter);
|
||||
check_message(_Cutoff, _It, _Msg) ->
|
||||
false.
|
||||
|
||||
format_key(KeyMapper, Key) ->
|
||||
Vec = [integer_to_list(I, 16) || I <- emqx_ds_bitmask_keymapper:key_to_vector(KeyMapper, Key)],
|
||||
lists:flatten(io_lib:format("~.16B (~s)", [Key, string:join(Vec, ",")])).
|
||||
|
||||
-spec make_key(s(), emqx_types:message()) -> {binary(), [binary()]}.
|
||||
make_key(#s{keymappers = KeyMappers, trie = Trie}, #message{timestamp = Timestamp, topic = TopicBin}) ->
|
||||
Tokens = emqx_topic:tokens(TopicBin),
|
||||
{TopicIndex, Varying} = emqx_ds_lts:topic_key(Trie, fun threshold_fun/1, Tokens),
|
||||
VaryingHashes = [hash_topic_level(I) || I <- Varying],
|
||||
KeyMapper = array:get(length(Varying), KeyMappers),
|
||||
KeyBin = make_key(KeyMapper, TopicIndex, Timestamp, VaryingHashes),
|
||||
{KeyBin, Varying}.
|
||||
|
||||
-spec make_key(emqx_ds_bitmask_keymapper:keymapper(), emqx_ds_lts:static_key(), emqx_ds:time(), [
|
||||
non_neg_integer()
|
||||
]) ->
|
||||
binary().
|
||||
make_key(KeyMapper, TopicIndex, Timestamp, Varying) ->
|
||||
UniqueInteger = erlang:unique_integer([monotonic, positive]),
|
||||
emqx_ds_bitmask_keymapper:key_to_bitstring(
|
||||
KeyMapper,
|
||||
emqx_ds_bitmask_keymapper:vector_to_key(KeyMapper, [
|
||||
TopicIndex, Timestamp, UniqueInteger | Varying
|
||||
])
|
||||
).
|
||||
|
||||
%% TODO: don't hardcode the thresholds
|
||||
threshold_fun(0) ->
|
||||
100;
|
||||
threshold_fun(_) ->
|
||||
20.
|
||||
|
||||
hash_topic_level(TopicLevel) ->
|
||||
<<Int:64, _/binary>> = erlang:md5(TopicLevel),
|
||||
Int.
|
||||
|
||||
serialize(Msg) ->
|
||||
term_to_binary(Msg).
|
||||
|
||||
deserialize(Blob) ->
|
||||
binary_to_term(Blob).
|
||||
|
||||
-define(BYTE_SIZE, 8).
|
||||
|
||||
%% erlfmt-ignore
|
||||
make_keymapper(TopicIndexBytes, BitsPerTopicLevel, TSBits, TSOffsetBits, N) ->
|
||||
Bitsources =
|
||||
%% Dimension Offset Bitsize
|
||||
[{1, 0, TopicIndexBytes * ?BYTE_SIZE}, %% Topic index
|
||||
{2, TSOffsetBits, TSBits - TSOffsetBits }] ++ %% Timestamp epoch
|
||||
[{3 + I, 0, BitsPerTopicLevel } %% Varying topic levels
|
||||
|| I <- lists:seq(1, N)] ++
|
||||
[{2, 0, TSOffsetBits }, %% Timestamp offset
|
||||
{3, 0, 64 }], %% Unique integer
|
||||
Keymapper = emqx_ds_bitmask_keymapper:make_keymapper(lists:reverse(Bitsources)),
|
||||
%% Assert:
|
||||
case emqx_ds_bitmask_keymapper:bitsize(Keymapper) rem 8 of
|
||||
0 ->
|
||||
ok;
|
||||
_ ->
|
||||
error(#{'$msg' => "Non-even key size", bitsources => Bitsources})
|
||||
end,
|
||||
Keymapper.
|
||||
|
||||
-spec restore_trie(pos_integer(), rocksdb:db_handle(), rocksdb:cf_handle()) -> emqx_ds_lts:trie().
|
||||
restore_trie(TopicIndexBytes, DB, CF) ->
|
||||
PersistCallback = fun(Key, Val) ->
|
||||
rocksdb:put(DB, CF, term_to_binary(Key), term_to_binary(Val), [])
|
||||
end,
|
||||
{ok, IT} = rocksdb:iterator(DB, CF, []),
|
||||
try
|
||||
Dump = read_persisted_trie(IT, rocksdb:iterator_move(IT, first)),
|
||||
TrieOpts = #{persist_callback => PersistCallback, static_key_size => TopicIndexBytes},
|
||||
emqx_ds_lts:trie_restore(TrieOpts, Dump)
|
||||
after
|
||||
rocksdb:iterator_close(IT)
|
||||
end.
|
||||
|
||||
read_persisted_trie(IT, {ok, KeyB, ValB}) ->
|
||||
[
|
||||
{binary_to_term(KeyB), binary_to_term(ValB)}
|
||||
| read_persisted_trie(IT, rocksdb:iterator_move(IT, next))
|
||||
];
|
||||
read_persisted_trie(_IT, {error, invalid_iterator}) ->
|
||||
[].
|
||||
|
||||
inc_counter() ->
|
||||
N = get(?COUNTER),
|
||||
put(?COUNTER, N + 1).
|
||||
|
||||
%% @doc Generate a column family ID for the MQTT messages
|
||||
-spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()].
|
||||
data_cf(GenId) ->
|
||||
"emqx_ds_storage_bitfield_lts_data" ++ integer_to_list(GenId).
|
||||
|
||||
%% @doc Generate a column family ID for the trie
|
||||
-spec trie_cf(emqx_ds_storage_layer:gen_id()) -> [char()].
|
||||
trie_cf(GenId) ->
|
||||
"emqx_ds_storage_bitfield_lts_trie" ++ integer_to_list(GenId).
|
|
@ -1,277 +1,255 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_storage_layer).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
%% API:
|
||||
-export([start_link/2]).
|
||||
-export([create_generation/3]).
|
||||
%% Replication layer API:
|
||||
-export([open_shard/2, drop_shard/1, store_batch/3, get_streams/3, make_iterator/4, next/3]).
|
||||
|
||||
-export([store/5]).
|
||||
-export([delete/4]).
|
||||
%% gen_server
|
||||
-export([start_link/2, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
|
||||
|
||||
-export([make_iterator/2, next/1]).
|
||||
%% internal exports:
|
||||
-export([db_dir/1]).
|
||||
|
||||
-export([
|
||||
preserve_iterator/2,
|
||||
restore_iterator/2,
|
||||
discard_iterator/2,
|
||||
ensure_iterator/3,
|
||||
discard_iterator_prefix/2,
|
||||
list_iterator_prefix/2,
|
||||
foldl_iterator_prefix/4
|
||||
]).
|
||||
-export_type([gen_id/0, generation/0, cf_refs/0, stream/0, iterator/0]).
|
||||
|
||||
%% behaviour callbacks:
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
|
||||
|
||||
-export_type([cf_refs/0, gen_id/0, options/0, state/0, iterator/0]).
|
||||
-export_type([db_options/0, db_write_options/0, db_read_options/0]).
|
||||
|
||||
-compile({inline, [meta_lookup/2]}).
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
-type options() :: #{
|
||||
dir => file:filename()
|
||||
}.
|
||||
-type prototype() ::
|
||||
{emqx_ds_storage_reference, emqx_ds_storage_reference:options()}
|
||||
| {emqx_ds_storage_bitfield_lts, emqx_ds_storage_bitfield_lts:options()}.
|
||||
|
||||
%% see rocksdb:db_options()
|
||||
-type db_options() :: proplists:proplist().
|
||||
%% see rocksdb:write_options()
|
||||
-type db_write_options() :: proplists:proplist().
|
||||
%% see rocksdb:read_options()
|
||||
-type db_read_options() :: proplists:proplist().
|
||||
-type shard_id() :: emqx_ds_replication_layer:shard_id().
|
||||
|
||||
-type cf_refs() :: [{string(), rocksdb:cf_handle()}].
|
||||
|
||||
%% Message storage generation
|
||||
%% Keep in mind that instances of this type are persisted in long-term storage.
|
||||
-type generation() :: #{
|
||||
%% Module that handles data for the generation
|
||||
-type gen_id() :: 0..16#ffff.
|
||||
|
||||
%% Note: this record might be stored permanently on a remote node.
|
||||
-record(stream, {
|
||||
generation :: gen_id(),
|
||||
enc :: _EncapsulatedData,
|
||||
misc = #{} :: map()
|
||||
}).
|
||||
|
||||
-opaque stream() :: #stream{}.
|
||||
|
||||
%% Note: this record might be stored permanently on a remote node.
|
||||
-record(it, {
|
||||
generation :: gen_id(),
|
||||
enc :: _EncapsulatedData,
|
||||
misc = #{} :: map()
|
||||
}).
|
||||
|
||||
-opaque iterator() :: #it{}.
|
||||
|
||||
%%%% Generation:
|
||||
|
||||
-type generation(Data) :: #{
|
||||
%% Module that handles data for the generation:
|
||||
module := module(),
|
||||
%% Module-specific data defined at generation creation time
|
||||
data := term(),
|
||||
%% Module-specific data defined at generation creation time:
|
||||
data := Data,
|
||||
%% When should this generation become active?
|
||||
%% This generation should only contain messages timestamped no earlier than that.
|
||||
%% The very first generation will have `since` equal 0.
|
||||
since := emqx_ds:time()
|
||||
since := emqx_ds:time(),
|
||||
until := emqx_ds:time() | undefined
|
||||
}.
|
||||
|
||||
%% Schema for a generation. Persistent term.
|
||||
-type generation_schema() :: generation(term()).
|
||||
|
||||
%% Runtime view of generation:
|
||||
-type generation() :: generation(term()).
|
||||
|
||||
%%%% Shard:
|
||||
|
||||
-type shard(GenData) :: #{
|
||||
%% ID of the current generation (where the new data is written):
|
||||
current_generation := gen_id(),
|
||||
%% This data is used to create new generation:
|
||||
prototype := prototype(),
|
||||
%% Generations:
|
||||
{generation, gen_id()} => GenData
|
||||
}.
|
||||
|
||||
%% Shard schema (persistent):
|
||||
-type shard_schema() :: shard(generation_schema()).
|
||||
|
||||
%% Shard (runtime):
|
||||
-type shard() :: shard(generation()).
|
||||
|
||||
%%================================================================================
|
||||
%% Generation callbacks
|
||||
%%================================================================================
|
||||
|
||||
%% Create the new schema given generation id and the options.
|
||||
%% Create rocksdb column families.
|
||||
-callback create(shard_id(), rocksdb:db_handle(), gen_id(), _Options) ->
|
||||
{_Schema, cf_refs()}.
|
||||
|
||||
%% Open the existing schema
|
||||
-callback open(shard_id(), rocsdb:db_handle(), gen_id(), cf_refs(), _Schema) ->
|
||||
_Data.
|
||||
|
||||
-callback store_batch(shard_id(), _Data, [emqx_types:message()], emqx_ds:message_store_opts()) ->
|
||||
emqx_ds:store_batch_result().
|
||||
|
||||
-callback get_streams(shard_id(), _Data, emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||
[_Stream].
|
||||
|
||||
-callback make_iterator(shard_id(), _Data, _Stream, emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||
emqx_ds:make_iterator_result(_Iterator).
|
||||
|
||||
-callback next(shard_id(), _Data, Iter, pos_integer()) ->
|
||||
{ok, Iter, [emqx_types:message()]} | {error, _}.
|
||||
|
||||
%%================================================================================
|
||||
%% API for the replication layer
|
||||
%%================================================================================
|
||||
|
||||
-spec open_shard(shard_id(), emqx_ds:builtin_db_opts()) -> ok.
|
||||
open_shard(Shard, Options) ->
|
||||
emqx_ds_storage_layer_sup:ensure_shard(Shard, Options).
|
||||
|
||||
-spec drop_shard(shard_id()) -> ok.
|
||||
drop_shard(Shard) ->
|
||||
catch emqx_ds_storage_layer_sup:stop_shard(Shard),
|
||||
ok = rocksdb:destroy(db_dir(Shard), []).
|
||||
|
||||
-spec store_batch(shard_id(), [emqx_types:message()], emqx_ds:message_store_opts()) ->
|
||||
emqx_ds:store_batch_result().
|
||||
store_batch(Shard, Messages, Options) ->
|
||||
%% We always store messages in the current generation:
|
||||
GenId = generation_current(Shard),
|
||||
#{module := Mod, data := GenData} = generation_get(Shard, GenId),
|
||||
Mod:store_batch(Shard, GenData, Messages, Options).
|
||||
|
||||
-spec get_streams(shard_id(), emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||
[{integer(), stream()}].
|
||||
get_streams(Shard, TopicFilter, StartTime) ->
|
||||
Gens = generations_since(Shard, StartTime),
|
||||
lists:flatmap(
|
||||
fun(GenId) ->
|
||||
#{module := Mod, data := GenData} = generation_get(Shard, GenId),
|
||||
Streams = Mod:get_streams(Shard, GenData, TopicFilter, StartTime),
|
||||
[
|
||||
{GenId, #stream{
|
||||
generation = GenId,
|
||||
enc = Stream
|
||||
}}
|
||||
|| Stream <- Streams
|
||||
]
|
||||
end,
|
||||
Gens
|
||||
).
|
||||
|
||||
-spec make_iterator(shard_id(), stream(), emqx_ds:topic_filter(), emqx_ds:time()) ->
|
||||
emqx_ds:make_iterator_result(iterator()).
|
||||
make_iterator(Shard, #stream{generation = GenId, enc = Stream}, TopicFilter, StartTime) ->
|
||||
#{module := Mod, data := GenData} = generation_get(Shard, GenId),
|
||||
case Mod:make_iterator(Shard, GenData, Stream, TopicFilter, StartTime) of
|
||||
{ok, Iter} ->
|
||||
{ok, #it{
|
||||
generation = GenId,
|
||||
enc = Iter
|
||||
}};
|
||||
{error, _} = Err ->
|
||||
Err
|
||||
end.
|
||||
|
||||
-spec next(shard_id(), iterator(), pos_integer()) ->
|
||||
emqx_ds:next_result(iterator()).
|
||||
next(Shard, Iter = #it{generation = GenId, enc = GenIter0}, BatchSize) ->
|
||||
#{module := Mod, data := GenData} = generation_get(Shard, GenId),
|
||||
Current = generation_current(Shard),
|
||||
case Mod:next(Shard, GenData, GenIter0, BatchSize) of
|
||||
{ok, _GenIter, []} when GenId < Current ->
|
||||
%% This is a past generation. Storage layer won't write
|
||||
%% any more messages here. The iterator reached the end:
|
||||
%% the stream has been fully replayed.
|
||||
{ok, end_of_stream};
|
||||
{ok, GenIter, Batch} ->
|
||||
{ok, Iter#it{enc = GenIter}, Batch};
|
||||
Error = {error, _} ->
|
||||
Error
|
||||
end.
|
||||
|
||||
%%================================================================================
|
||||
%% gen_server for the shard
|
||||
%%================================================================================
|
||||
|
||||
-define(REF(ShardId), {via, gproc, {n, l, {?MODULE, ShardId}}}).
|
||||
|
||||
-spec start_link(shard_id(), emqx_ds:builtin_db_opts()) ->
|
||||
{ok, pid()}.
|
||||
start_link(Shard, Options) ->
|
||||
gen_server:start_link(?REF(Shard), ?MODULE, {Shard, Options}, []).
|
||||
|
||||
-record(s, {
|
||||
shard :: emqx_ds:shard(),
|
||||
shard_id :: emqx_ds:shard_id(),
|
||||
db :: rocksdb:db_handle(),
|
||||
cf_iterator :: rocksdb:cf_handle(),
|
||||
cf_generations :: cf_refs()
|
||||
cf_refs :: cf_refs(),
|
||||
schema :: shard_schema(),
|
||||
shard :: shard()
|
||||
}).
|
||||
|
||||
-record(it, {
|
||||
shard :: emqx_ds:shard(),
|
||||
gen :: gen_id(),
|
||||
replay :: emqx_ds:replay(),
|
||||
module :: module(),
|
||||
data :: term()
|
||||
}).
|
||||
%% Note: we specify gen_server requests as records to make use of Dialyzer:
|
||||
-record(call_create_generation, {since :: emqx_ds:time()}).
|
||||
|
||||
-type gen_id() :: 0..16#ffff.
|
||||
|
||||
-opaque state() :: #s{}.
|
||||
-opaque iterator() :: #it{}.
|
||||
|
||||
%% Contents of the default column family:
|
||||
%%
|
||||
%% [{<<"genNN">>, #generation{}}, ...,
|
||||
%% {<<"current">>, GenID}]
|
||||
-type server_state() :: #s{}.
|
||||
|
||||
-define(DEFAULT_CF, "default").
|
||||
-define(DEFAULT_CF_OPTS, []).
|
||||
|
||||
-define(ITERATOR_CF, "$iterators").
|
||||
|
||||
%% TODO
|
||||
%% 1. CuckooTable might be of use here / `OptimizeForPointLookup(...)`.
|
||||
%% 2. Supposedly might be compressed _very_ effectively.
|
||||
%% 3. `inplace_update_support`?
|
||||
-define(ITERATOR_CF_OPTS, []).
|
||||
|
||||
-define(REF(Keyspace, ShardId), {via, gproc, {n, l, {?MODULE, Keyspace, ShardId}}}).
|
||||
|
||||
%%================================================================================
|
||||
%% Callbacks
|
||||
%%================================================================================
|
||||
|
||||
-callback create_new(rocksdb:db_handle(), gen_id(), _Options :: term()) ->
|
||||
{_Schema, cf_refs()}.
|
||||
|
||||
-callback open(
|
||||
emqx_ds:shard(),
|
||||
rocksdb:db_handle(),
|
||||
gen_id(),
|
||||
cf_refs(),
|
||||
_Schema
|
||||
) ->
|
||||
term().
|
||||
|
||||
-callback store(
|
||||
_Schema,
|
||||
_MessageID :: binary(),
|
||||
emqx_ds:time(),
|
||||
emqx_ds:topic(),
|
||||
_Payload :: binary()
|
||||
) ->
|
||||
ok | {error, _}.
|
||||
|
||||
-callback delete(_Schema, _MessageID :: binary(), emqx_ds:time(), emqx_ds:topic()) ->
|
||||
ok | {error, _}.
|
||||
|
||||
-callback make_iterator(_Schema, emqx_ds:replay()) ->
|
||||
{ok, _It} | {error, _}.
|
||||
|
||||
-callback restore_iterator(_Schema, emqx_ds:replay(), binary()) -> {ok, _It} | {error, _}.
|
||||
|
||||
-callback preserve_iterator(_It) -> term().
|
||||
|
||||
-callback next(It) -> {value, binary(), It} | none | {error, closed}.
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
-spec start_link(emqx_ds:shard(), emqx_ds_storage_layer:options()) ->
|
||||
{ok, pid()}.
|
||||
start_link(Shard = {Keyspace, ShardId}, Options) ->
|
||||
gen_server:start_link(?REF(Keyspace, ShardId), ?MODULE, {Shard, Options}, []).
|
||||
|
||||
-spec create_generation(
|
||||
emqx_ds:shard(), emqx_ds:time(), emqx_ds_conf:backend_config()
|
||||
) ->
|
||||
{ok, gen_id()} | {error, nonmonotonic}.
|
||||
create_generation({Keyspace, ShardId}, Since, Config = {_Module, _Options}) ->
|
||||
gen_server:call(?REF(Keyspace, ShardId), {create_generation, Since, Config}).
|
||||
|
||||
-spec store(emqx_ds:shard(), emqx_guid:guid(), emqx_ds:time(), emqx_ds:topic(), binary()) ->
|
||||
ok | {error, _}.
|
||||
store(Shard, GUID, Time, Topic, Msg) ->
|
||||
{_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Shard, Time),
|
||||
Mod:store(Data, GUID, Time, Topic, Msg).
|
||||
|
||||
-spec delete(emqx_ds:shard(), emqx_guid:guid(), emqx_ds:time(), emqx_ds:topic()) ->
|
||||
ok | {error, _}.
|
||||
delete(Shard, GUID, Time, Topic) ->
|
||||
{_GenId, #{module := Mod, data := Data}} = meta_lookup_gen(Shard, Time),
|
||||
Mod:delete(Data, GUID, Time, Topic).
|
||||
|
||||
-spec make_iterator(emqx_ds:shard(), emqx_ds:replay()) ->
|
||||
{ok, iterator()} | {error, _TODO}.
|
||||
make_iterator(Shard, Replay = {_, StartTime}) ->
|
||||
{GenId, Gen} = meta_lookup_gen(Shard, StartTime),
|
||||
open_iterator(Gen, #it{
|
||||
shard = Shard,
|
||||
gen = GenId,
|
||||
replay = Replay
|
||||
}).
|
||||
|
||||
-spec next(iterator()) -> {value, binary(), iterator()} | none | {error, closed}.
|
||||
next(It = #it{module = Mod, data = ItData}) ->
|
||||
case Mod:next(ItData) of
|
||||
{value, Val, ItDataNext} ->
|
||||
{value, Val, It#it{data = ItDataNext}};
|
||||
{error, _} = Error ->
|
||||
Error;
|
||||
none ->
|
||||
case open_next_iterator(It) of
|
||||
{ok, ItNext} ->
|
||||
next(ItNext);
|
||||
{error, _} = Error ->
|
||||
Error;
|
||||
none ->
|
||||
none
|
||||
end
|
||||
end.
|
||||
|
||||
-spec preserve_iterator(iterator(), emqx_ds:iterator_id()) ->
|
||||
ok | {error, _TODO}.
|
||||
preserve_iterator(It = #it{}, IteratorID) ->
|
||||
iterator_put_state(IteratorID, It).
|
||||
|
||||
-spec restore_iterator(emqx_ds:shard(), emqx_ds:replay_id()) ->
|
||||
{ok, iterator()} | {error, _TODO}.
|
||||
restore_iterator(Shard, ReplayID) ->
|
||||
case iterator_get_state(Shard, ReplayID) of
|
||||
{ok, Serial} ->
|
||||
restore_iterator_state(Shard, Serial);
|
||||
not_found ->
|
||||
{error, not_found};
|
||||
{error, _Reason} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
-spec ensure_iterator(emqx_ds:shard(), emqx_ds:iterator_id(), emqx_ds:replay()) ->
|
||||
{ok, iterator()} | {error, _TODO}.
|
||||
ensure_iterator(Shard, IteratorID, Replay = {_TopicFilter, _StartMS}) ->
|
||||
case restore_iterator(Shard, IteratorID) of
|
||||
{ok, It} ->
|
||||
{ok, It};
|
||||
{error, not_found} ->
|
||||
{ok, It} = make_iterator(Shard, Replay),
|
||||
ok = emqx_ds_storage_layer:preserve_iterator(It, IteratorID),
|
||||
{ok, It};
|
||||
Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
-spec discard_iterator(emqx_ds:shard(), emqx_ds:replay_id()) ->
|
||||
ok | {error, _TODO}.
|
||||
discard_iterator(Shard, ReplayID) ->
|
||||
iterator_delete(Shard, ReplayID).
|
||||
|
||||
-spec discard_iterator_prefix(emqx_ds:shard(), binary()) ->
|
||||
ok | {error, _TODO}.
|
||||
discard_iterator_prefix(Shard, KeyPrefix) ->
|
||||
case do_discard_iterator_prefix(Shard, KeyPrefix) of
|
||||
{ok, _} -> ok;
|
||||
Error -> Error
|
||||
end.
|
||||
|
||||
-spec list_iterator_prefix(
|
||||
emqx_ds:shard(),
|
||||
binary()
|
||||
) -> {ok, [emqx_ds:iterator_id()]} | {error, _TODO}.
|
||||
list_iterator_prefix(Shard, KeyPrefix) ->
|
||||
do_list_iterator_prefix(Shard, KeyPrefix).
|
||||
|
||||
-spec foldl_iterator_prefix(
|
||||
emqx_ds:shard(),
|
||||
binary(),
|
||||
fun((_Key :: binary(), _Value :: binary(), Acc) -> Acc),
|
||||
Acc
|
||||
) -> {ok, Acc} | {error, _TODO} when
|
||||
Acc :: term().
|
||||
foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc) ->
|
||||
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc).
|
||||
|
||||
%%================================================================================
|
||||
%% behaviour callbacks
|
||||
%%================================================================================
|
||||
|
||||
init({Shard, Options}) ->
|
||||
init({ShardId, Options}) ->
|
||||
process_flag(trap_exit, true),
|
||||
{ok, S0} = open_db(Shard, Options),
|
||||
S = ensure_current_generation(S0),
|
||||
ok = populate_metadata(S),
|
||||
logger:set_process_metadata(#{shard_id => ShardId, domain => [ds, storage_layer, shard]}),
|
||||
erase_schema_runtime(ShardId),
|
||||
{ok, DB, CFRefs0} = rocksdb_open(ShardId, Options),
|
||||
{Schema, CFRefs} =
|
||||
case get_schema_persistent(DB) of
|
||||
not_found ->
|
||||
Prototype = maps:get(storage, Options),
|
||||
create_new_shard_schema(ShardId, DB, CFRefs0, Prototype);
|
||||
Scm ->
|
||||
{Scm, CFRefs0}
|
||||
end,
|
||||
Shard = open_shard(ShardId, DB, CFRefs, Schema),
|
||||
S = #s{
|
||||
shard_id = ShardId,
|
||||
db = DB,
|
||||
cf_refs = CFRefs,
|
||||
schema = Schema,
|
||||
shard = Shard
|
||||
},
|
||||
commit_metadata(S),
|
||||
{ok, S}.
|
||||
|
||||
handle_call({create_generation, Since, Config}, _From, S) ->
|
||||
case create_new_gen(Since, Config, S) of
|
||||
{ok, GenId, NS} ->
|
||||
{reply, {ok, GenId}, NS};
|
||||
{error, _} = Error ->
|
||||
{reply, Error, S}
|
||||
end;
|
||||
handle_call(#call_create_generation{since = Since}, _From, S0) ->
|
||||
S = add_generation(S0, Since),
|
||||
commit_metadata(S),
|
||||
{reply, ok, S};
|
||||
handle_call(_Call, _From, S) ->
|
||||
{reply, {error, unknown_call}, S}.
|
||||
|
||||
|
@ -281,346 +259,182 @@ handle_cast(_Cast, S) ->
|
|||
handle_info(_Info, S) ->
|
||||
{noreply, S}.
|
||||
|
||||
terminate(_Reason, #s{db = DB, shard = Shard}) ->
|
||||
meta_erase(Shard),
|
||||
terminate(_Reason, #s{db = DB, shard_id = ShardId}) ->
|
||||
erase_schema_runtime(ShardId),
|
||||
ok = rocksdb:close(DB).
|
||||
|
||||
%%================================================================================
|
||||
%% Internal exports
|
||||
%%================================================================================
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
-record(db, {handle :: rocksdb:db_handle(), cf_iterator :: rocksdb:cf_handle()}).
|
||||
-spec open_shard(shard_id(), rocksdb:db_handle(), cf_refs(), shard_schema()) ->
|
||||
shard().
|
||||
open_shard(ShardId, DB, CFRefs, ShardSchema) ->
|
||||
%% Transform generation schemas to generation runtime data:
|
||||
maps:map(
|
||||
fun
|
||||
({generation, GenId}, GenSchema) ->
|
||||
open_generation(ShardId, DB, CFRefs, GenId, GenSchema);
|
||||
(_K, Val) ->
|
||||
Val
|
||||
end,
|
||||
ShardSchema
|
||||
).
|
||||
|
||||
-spec populate_metadata(state()) -> ok.
|
||||
populate_metadata(S = #s{shard = Shard, db = DBHandle, cf_iterator = CFIterator}) ->
|
||||
ok = meta_put(Shard, db, #db{handle = DBHandle, cf_iterator = CFIterator}),
|
||||
Current = schema_get_current(DBHandle),
|
||||
lists:foreach(fun(GenId) -> populate_metadata(GenId, S) end, lists:seq(0, Current)).
|
||||
-spec add_generation(server_state(), emqx_ds:time()) -> server_state().
|
||||
add_generation(S0, Since) ->
|
||||
#s{shard_id = ShardId, db = DB, schema = Schema0, shard = Shard0, cf_refs = CFRefs0} = S0,
|
||||
{GenId, Schema, NewCFRefs} = new_generation(ShardId, DB, Schema0, Since),
|
||||
CFRefs = NewCFRefs ++ CFRefs0,
|
||||
Key = {generation, GenId},
|
||||
Generation = open_generation(ShardId, DB, CFRefs, GenId, maps:get(Key, Schema)),
|
||||
Shard = Shard0#{Key => Generation},
|
||||
S0#s{
|
||||
cf_refs = CFRefs,
|
||||
schema = Schema,
|
||||
shard = Shard
|
||||
}.
|
||||
|
||||
-spec populate_metadata(gen_id(), state()) -> ok.
|
||||
populate_metadata(GenId, S = #s{shard = Shard, db = DBHandle}) ->
|
||||
Gen = open_gen(GenId, schema_get_gen(DBHandle, GenId), S),
|
||||
meta_register_gen(Shard, GenId, Gen).
|
||||
-spec open_generation(shard_id(), rocksdb:db_handle(), cf_refs(), gen_id(), generation_schema()) ->
|
||||
generation().
|
||||
open_generation(ShardId, DB, CFRefs, GenId, GenSchema) ->
|
||||
?tp(debug, ds_open_generation, #{gen_id => GenId, schema => GenSchema}),
|
||||
#{module := Mod, data := Schema} = GenSchema,
|
||||
RuntimeData = Mod:open(ShardId, DB, GenId, CFRefs, Schema),
|
||||
GenSchema#{data => RuntimeData}.
|
||||
|
||||
-spec ensure_current_generation(state()) -> state().
|
||||
ensure_current_generation(S = #s{shard = {Keyspace, _ShardId}, db = DBHandle}) ->
|
||||
case schema_get_current(DBHandle) of
|
||||
undefined ->
|
||||
Config = emqx_ds_conf:keyspace_config(Keyspace),
|
||||
{ok, _, NS} = create_new_gen(0, Config, S),
|
||||
NS;
|
||||
_GenId ->
|
||||
S
|
||||
end.
|
||||
|
||||
-spec create_new_gen(emqx_ds:time(), emqx_ds_conf:backend_config(), state()) ->
|
||||
{ok, gen_id(), state()} | {error, nonmonotonic}.
|
||||
create_new_gen(Since, Config, S = #s{shard = Shard, db = DBHandle}) ->
|
||||
GenId = get_next_id(meta_get_current(Shard)),
|
||||
GenId = get_next_id(schema_get_current(DBHandle)),
|
||||
case is_gen_valid(Shard, GenId, Since) of
|
||||
ok ->
|
||||
{ok, Gen, NS} = create_gen(GenId, Since, Config, S),
|
||||
%% TODO: Transaction? Column family creation can't be transactional, anyway.
|
||||
ok = schema_put_gen(DBHandle, GenId, Gen),
|
||||
ok = schema_put_current(DBHandle, GenId),
|
||||
ok = meta_register_gen(Shard, GenId, open_gen(GenId, Gen, NS)),
|
||||
{ok, GenId, NS};
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
-spec create_gen(gen_id(), emqx_ds:time(), emqx_ds_conf:backend_config(), state()) ->
|
||||
{ok, generation(), state()}.
|
||||
create_gen(GenId, Since, {Module, Options}, S = #s{db = DBHandle, cf_generations = CFs}) ->
|
||||
% TODO: Backend implementation should ensure idempotency.
|
||||
{Schema, NewCFs} = Module:create_new(DBHandle, GenId, Options),
|
||||
Gen = #{
|
||||
module => Module,
|
||||
data => Schema,
|
||||
since => Since
|
||||
-spec create_new_shard_schema(shard_id(), rocksdb:db_handle(), cf_refs(), prototype()) ->
|
||||
{shard_schema(), cf_refs()}.
|
||||
create_new_shard_schema(ShardId, DB, CFRefs, Prototype) ->
|
||||
?tp(notice, ds_create_new_shard_schema, #{shard => ShardId, prototype => Prototype}),
|
||||
%% TODO: read prototype from options/config
|
||||
Schema0 = #{
|
||||
current_generation => 0,
|
||||
prototype => Prototype
|
||||
},
|
||||
{ok, Gen, S#s{cf_generations = NewCFs ++ CFs}}.
|
||||
{_NewGenId, Schema, NewCFRefs} = new_generation(ShardId, DB, Schema0, _Since = 0),
|
||||
{Schema, NewCFRefs ++ CFRefs}.
|
||||
|
||||
-spec open_db(emqx_ds:shard(), options()) -> {ok, state()} | {error, _TODO}.
|
||||
open_db(Shard = {Keyspace, ShardId}, Options) ->
|
||||
DefaultDir = filename:join([atom_to_binary(Keyspace), ShardId]),
|
||||
DBDir = unicode:characters_to_list(maps:get(dir, Options, DefaultDir)),
|
||||
-spec new_generation(shard_id(), rocksdb:db_handle(), shard_schema(), emqx_ds:time()) ->
|
||||
{gen_id(), shard_schema(), cf_refs()}.
|
||||
new_generation(ShardId, DB, Schema0, Since) ->
|
||||
#{current_generation := PrevGenId, prototype := {Mod, ModConf}} = Schema0,
|
||||
GenId = PrevGenId + 1,
|
||||
{GenData, NewCFRefs} = Mod:create(ShardId, DB, GenId, ModConf),
|
||||
GenSchema = #{module => Mod, data => GenData, since => Since, until => undefined},
|
||||
Schema = Schema0#{
|
||||
current_generation => GenId,
|
||||
{generation, GenId} => GenSchema
|
||||
},
|
||||
{GenId, Schema, NewCFRefs}.
|
||||
|
||||
%% @doc Commit current state of the server to both rocksdb and the persistent term
|
||||
-spec commit_metadata(server_state()) -> ok.
|
||||
commit_metadata(#s{shard_id = ShardId, schema = Schema, shard = Runtime, db = DB}) ->
|
||||
ok = put_schema_persistent(DB, Schema),
|
||||
put_schema_runtime(ShardId, Runtime).
|
||||
|
||||
-spec rocksdb_open(shard_id(), emqx_ds:builtin_db_opts()) ->
|
||||
{ok, rocksdb:db_handle(), cf_refs()} | {error, _TODO}.
|
||||
rocksdb_open(Shard, Options) ->
|
||||
DBOptions = [
|
||||
{create_if_missing, true},
|
||||
{create_missing_column_families, true}
|
||||
| emqx_ds_conf:db_options(Keyspace)
|
||||
| maps:get(db_options, Options, [])
|
||||
],
|
||||
DBDir = db_dir(Shard),
|
||||
_ = filelib:ensure_dir(DBDir),
|
||||
ExistingCFs =
|
||||
case rocksdb:list_column_families(DBDir, DBOptions) of
|
||||
{ok, CFs} ->
|
||||
[{Name, []} || Name <- CFs, Name /= ?DEFAULT_CF, Name /= ?ITERATOR_CF];
|
||||
[{Name, []} || Name <- CFs, Name /= ?DEFAULT_CF];
|
||||
% DB is not present. First start
|
||||
{error, {db_open, _}} ->
|
||||
[]
|
||||
end,
|
||||
ColumnFamilies = [
|
||||
{?DEFAULT_CF, ?DEFAULT_CF_OPTS},
|
||||
{?ITERATOR_CF, ?ITERATOR_CF_OPTS}
|
||||
{?DEFAULT_CF, ?DEFAULT_CF_OPTS}
|
||||
| ExistingCFs
|
||||
],
|
||||
case rocksdb:open(DBDir, DBOptions, ColumnFamilies) of
|
||||
{ok, DBHandle, [_CFDefault, CFIterator | CFRefs]} ->
|
||||
{ok, DBHandle, [_CFDefault | CFRefs]} ->
|
||||
{CFNames, _} = lists:unzip(ExistingCFs),
|
||||
{ok, #s{
|
||||
shard = Shard,
|
||||
db = DBHandle,
|
||||
cf_iterator = CFIterator,
|
||||
cf_generations = lists:zip(CFNames, CFRefs)
|
||||
}};
|
||||
{ok, DBHandle, lists:zip(CFNames, CFRefs)};
|
||||
Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
-spec open_gen(gen_id(), generation(), state()) -> generation().
|
||||
open_gen(
|
||||
GenId,
|
||||
Gen = #{module := Mod, data := Data},
|
||||
#s{shard = Shard, db = DBHandle, cf_generations = CFs}
|
||||
) ->
|
||||
DB = Mod:open(Shard, DBHandle, GenId, CFs, Data),
|
||||
Gen#{data := DB}.
|
||||
-spec db_dir(shard_id()) -> file:filename().
|
||||
db_dir({DB, ShardId}) ->
|
||||
filename:join([emqx:data_dir(), atom_to_list(DB), atom_to_list(ShardId)]).
|
||||
|
||||
-spec open_next_iterator(iterator()) -> {ok, iterator()} | {error, _Reason} | none.
|
||||
open_next_iterator(It = #it{shard = Shard, gen = GenId}) ->
|
||||
open_next_iterator(meta_get_gen(Shard, GenId + 1), It#it{gen = GenId + 1}).
|
||||
%%--------------------------------------------------------------------------------
|
||||
%% Schema access
|
||||
%%--------------------------------------------------------------------------------
|
||||
|
||||
open_next_iterator(undefined, _It) ->
|
||||
none;
|
||||
open_next_iterator(Gen = #{}, It) ->
|
||||
open_iterator(Gen, It).
|
||||
-spec generation_current(shard_id()) -> gen_id().
|
||||
generation_current(Shard) ->
|
||||
#{current_generation := Current} = get_schema_runtime(Shard),
|
||||
Current.
|
||||
|
||||
-spec open_iterator(generation(), iterator()) -> {ok, iterator()} | {error, _Reason}.
|
||||
open_iterator(#{module := Mod, data := Data}, It = #it{}) ->
|
||||
case Mod:make_iterator(Data, It#it.replay) of
|
||||
{ok, ItData} ->
|
||||
{ok, It#it{module = Mod, data = ItData}};
|
||||
Err ->
|
||||
Err
|
||||
end.
|
||||
-spec generation_get(shard_id(), gen_id()) -> generation().
|
||||
generation_get(Shard, GenId) ->
|
||||
#{{generation, GenId} := GenData} = get_schema_runtime(Shard),
|
||||
GenData.
|
||||
|
||||
-spec open_restore_iterator(generation(), iterator(), binary()) ->
|
||||
{ok, iterator()} | {error, _Reason}.
|
||||
open_restore_iterator(#{module := Mod, data := Data}, It = #it{replay = Replay}, Serial) ->
|
||||
case Mod:restore_iterator(Data, Replay, Serial) of
|
||||
{ok, ItData} ->
|
||||
{ok, It#it{module = Mod, data = ItData}};
|
||||
Err ->
|
||||
Err
|
||||
end.
|
||||
|
||||
%%
|
||||
|
||||
-define(KEY_REPLAY_STATE(IteratorId), <<(IteratorId)/binary, "rs">>).
|
||||
-define(KEY_REPLAY_STATE_PAT(KeyReplayState), begin
|
||||
<<IteratorId:(size(KeyReplayState) - 2)/binary, "rs">> = (KeyReplayState),
|
||||
IteratorId
|
||||
end).
|
||||
|
||||
-define(ITERATION_WRITE_OPTS, []).
|
||||
-define(ITERATION_READ_OPTS, []).
|
||||
|
||||
iterator_get_state(Shard, ReplayID) ->
|
||||
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
|
||||
rocksdb:get(Handle, CF, ?KEY_REPLAY_STATE(ReplayID), ?ITERATION_READ_OPTS).
|
||||
|
||||
iterator_put_state(ID, It = #it{shard = Shard}) ->
|
||||
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
|
||||
Serial = preserve_iterator_state(It),
|
||||
rocksdb:put(Handle, CF, ?KEY_REPLAY_STATE(ID), Serial, ?ITERATION_WRITE_OPTS).
|
||||
|
||||
iterator_delete(Shard, ID) ->
|
||||
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
|
||||
rocksdb:delete(Handle, CF, ?KEY_REPLAY_STATE(ID), ?ITERATION_WRITE_OPTS).
|
||||
|
||||
preserve_iterator_state(#it{
|
||||
gen = Gen,
|
||||
replay = {TopicFilter, StartTime},
|
||||
module = Mod,
|
||||
data = ItData
|
||||
}) ->
|
||||
term_to_binary(#{
|
||||
v => 1,
|
||||
gen => Gen,
|
||||
filter => TopicFilter,
|
||||
start => StartTime,
|
||||
st => Mod:preserve_iterator(ItData)
|
||||
}).
|
||||
|
||||
restore_iterator_state(Shard, Serial) when is_binary(Serial) ->
|
||||
restore_iterator_state(Shard, binary_to_term(Serial));
|
||||
restore_iterator_state(
|
||||
Shard,
|
||||
#{
|
||||
v := 1,
|
||||
gen := Gen,
|
||||
filter := TopicFilter,
|
||||
start := StartTime,
|
||||
st := State
|
||||
}
|
||||
) ->
|
||||
It = #it{shard = Shard, gen = Gen, replay = {TopicFilter, StartTime}},
|
||||
open_restore_iterator(meta_get_gen(Shard, Gen), It, State).
|
||||
|
||||
do_list_iterator_prefix(Shard, KeyPrefix) ->
|
||||
Fn = fun(K0, _V, Acc) ->
|
||||
K = ?KEY_REPLAY_STATE_PAT(K0),
|
||||
[K | Acc]
|
||||
end,
|
||||
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, []).
|
||||
|
||||
do_discard_iterator_prefix(Shard, KeyPrefix) ->
|
||||
#db{handle = DBHandle, cf_iterator = CF} = meta_lookup(Shard, db),
|
||||
Fn = fun(K, _V, _Acc) -> ok = rocksdb:delete(DBHandle, CF, K, ?ITERATION_WRITE_OPTS) end,
|
||||
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, ok).
|
||||
|
||||
do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc) ->
|
||||
#db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db),
|
||||
case rocksdb:iterator(Handle, CF, ?ITERATION_READ_OPTS) of
|
||||
{ok, It} ->
|
||||
NextAction = {seek, KeyPrefix},
|
||||
do_foldl_iterator_prefix(Handle, CF, It, KeyPrefix, NextAction, Fn, Acc);
|
||||
Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
do_foldl_iterator_prefix(DBHandle, CF, It, KeyPrefix, NextAction, Fn, Acc) ->
|
||||
case rocksdb:iterator_move(It, NextAction) of
|
||||
{ok, K = <<KeyPrefix:(size(KeyPrefix))/binary, _/binary>>, V} ->
|
||||
NewAcc = Fn(K, V, Acc),
|
||||
do_foldl_iterator_prefix(DBHandle, CF, It, KeyPrefix, next, Fn, NewAcc);
|
||||
{ok, _K, _V} ->
|
||||
ok = rocksdb:iterator_close(It),
|
||||
{ok, Acc};
|
||||
{error, invalid_iterator} ->
|
||||
ok = rocksdb:iterator_close(It),
|
||||
{ok, Acc};
|
||||
Error ->
|
||||
ok = rocksdb:iterator_close(It),
|
||||
Error
|
||||
end.
|
||||
|
||||
%% Functions for dealing with the metadata stored persistently in rocksdb
|
||||
|
||||
-define(CURRENT_GEN, <<"current">>).
|
||||
-define(SCHEMA_WRITE_OPTS, []).
|
||||
-define(SCHEMA_READ_OPTS, []).
|
||||
|
||||
-spec schema_get_gen(rocksdb:db_handle(), gen_id()) -> generation().
|
||||
schema_get_gen(DBHandle, GenId) ->
|
||||
{ok, Bin} = rocksdb:get(DBHandle, schema_gen_key(GenId), ?SCHEMA_READ_OPTS),
|
||||
binary_to_term(Bin).
|
||||
|
||||
-spec schema_put_gen(rocksdb:db_handle(), gen_id(), generation()) -> ok | {error, _}.
|
||||
schema_put_gen(DBHandle, GenId, Gen) ->
|
||||
rocksdb:put(DBHandle, schema_gen_key(GenId), term_to_binary(Gen), ?SCHEMA_WRITE_OPTS).
|
||||
|
||||
-spec schema_get_current(rocksdb:db_handle()) -> gen_id() | undefined.
|
||||
schema_get_current(DBHandle) ->
|
||||
case rocksdb:get(DBHandle, ?CURRENT_GEN, ?SCHEMA_READ_OPTS) of
|
||||
{ok, Bin} ->
|
||||
binary_to_integer(Bin);
|
||||
not_found ->
|
||||
undefined
|
||||
end.
|
||||
|
||||
-spec schema_put_current(rocksdb:db_handle(), gen_id()) -> ok | {error, _}.
|
||||
schema_put_current(DBHandle, GenId) ->
|
||||
rocksdb:put(DBHandle, ?CURRENT_GEN, integer_to_binary(GenId), ?SCHEMA_WRITE_OPTS).
|
||||
|
||||
-spec schema_gen_key(integer()) -> binary().
|
||||
schema_gen_key(N) ->
|
||||
<<"gen", N:32>>.
|
||||
|
||||
-undef(CURRENT_GEN).
|
||||
-undef(SCHEMA_WRITE_OPTS).
|
||||
-undef(SCHEMA_READ_OPTS).
|
||||
|
||||
%% Functions for dealing with the runtime shard metadata:
|
||||
|
||||
-define(PERSISTENT_TERM(SHARD, GEN), {?MODULE, SHARD, GEN}).
|
||||
|
||||
-spec meta_register_gen(emqx_ds:shard(), gen_id(), generation()) -> ok.
|
||||
meta_register_gen(Shard, GenId, Gen) ->
|
||||
Gs =
|
||||
case GenId > 0 of
|
||||
true -> meta_lookup(Shard, GenId - 1);
|
||||
false -> []
|
||||
-spec generations_since(shard_id(), emqx_ds:time()) -> [gen_id()].
|
||||
generations_since(Shard, Since) ->
|
||||
Schema = get_schema_runtime(Shard),
|
||||
maps:fold(
|
||||
fun
|
||||
({generation, GenId}, #{until := Until}, Acc) when Until >= Since ->
|
||||
[GenId | Acc];
|
||||
(_K, _V, Acc) ->
|
||||
Acc
|
||||
end,
|
||||
ok = meta_put(Shard, GenId, [Gen | Gs]),
|
||||
ok = meta_put(Shard, current, GenId).
|
||||
[],
|
||||
Schema
|
||||
).
|
||||
|
||||
-spec meta_lookup_gen(emqx_ds:shard(), emqx_ds:time()) -> {gen_id(), generation()}.
|
||||
meta_lookup_gen(Shard, Time) ->
|
||||
% TODO
|
||||
% Is cheaper persistent term GC on update here worth extra lookup? I'm leaning
|
||||
% towards a "no".
|
||||
Current = meta_lookup(Shard, current),
|
||||
Gens = meta_lookup(Shard, Current),
|
||||
find_gen(Time, Current, Gens).
|
||||
-define(PERSISTENT_TERM(SHARD), {emqx_ds_storage_layer, SHARD}).
|
||||
|
||||
find_gen(Time, GenId, [Gen = #{since := Since} | _]) when Time >= Since ->
|
||||
{GenId, Gen};
|
||||
find_gen(Time, GenId, [_Gen | Rest]) ->
|
||||
find_gen(Time, GenId - 1, Rest).
|
||||
-spec get_schema_runtime(shard_id()) -> shard().
|
||||
get_schema_runtime(Shard) ->
|
||||
persistent_term:get(?PERSISTENT_TERM(Shard)).
|
||||
|
||||
-spec meta_get_gen(emqx_ds:shard(), gen_id()) -> generation() | undefined.
|
||||
meta_get_gen(Shard, GenId) ->
|
||||
case meta_lookup(Shard, GenId, []) of
|
||||
[Gen | _Older] -> Gen;
|
||||
[] -> undefined
|
||||
end.
|
||||
-spec put_schema_runtime(shard_id(), shard()) -> ok.
|
||||
put_schema_runtime(Shard, RuntimeSchema) ->
|
||||
persistent_term:put(?PERSISTENT_TERM(Shard), RuntimeSchema),
|
||||
ok.
|
||||
|
||||
-spec meta_get_current(emqx_ds:shard()) -> gen_id() | undefined.
|
||||
meta_get_current(Shard) ->
|
||||
meta_lookup(Shard, current, undefined).
|
||||
|
||||
-spec meta_lookup(emqx_ds:shard(), _K) -> _V.
|
||||
meta_lookup(Shard, K) ->
|
||||
persistent_term:get(?PERSISTENT_TERM(Shard, K)).
|
||||
|
||||
-spec meta_lookup(emqx_ds:shard(), _K, Default) -> _V | Default.
|
||||
meta_lookup(Shard, K, Default) ->
|
||||
persistent_term:get(?PERSISTENT_TERM(Shard, K), Default).
|
||||
|
||||
-spec meta_put(emqx_ds:shard(), _K, _V) -> ok.
|
||||
meta_put(Shard, K, V) ->
|
||||
persistent_term:put(?PERSISTENT_TERM(Shard, K), V).
|
||||
|
||||
-spec meta_erase(emqx_ds:shard()) -> ok.
|
||||
meta_erase(Shard) ->
|
||||
[
|
||||
persistent_term:erase(K)
|
||||
|| {K = ?PERSISTENT_TERM(Z, _), _} <- persistent_term:get(), Z =:= Shard
|
||||
],
|
||||
-spec erase_schema_runtime(shard_id()) -> ok.
|
||||
erase_schema_runtime(Shard) ->
|
||||
persistent_term:erase(?PERSISTENT_TERM(Shard)),
|
||||
ok.
|
||||
|
||||
-undef(PERSISTENT_TERM).
|
||||
|
||||
get_next_id(undefined) -> 0;
|
||||
get_next_id(GenId) -> GenId + 1.
|
||||
-define(ROCKSDB_SCHEMA_KEY, <<"schema_v1">>).
|
||||
|
||||
is_gen_valid(Shard, GenId, Since) when GenId > 0 ->
|
||||
[GenPrev | _] = meta_lookup(Shard, GenId - 1),
|
||||
case GenPrev of
|
||||
#{since := SincePrev} when Since > SincePrev ->
|
||||
ok;
|
||||
#{} ->
|
||||
{error, nonmonotonic}
|
||||
end;
|
||||
is_gen_valid(_Shard, 0, 0) ->
|
||||
ok.
|
||||
-spec get_schema_persistent(rocksdb:db_handle()) -> shard_schema() | not_found.
|
||||
get_schema_persistent(DB) ->
|
||||
case rocksdb:get(DB, ?ROCKSDB_SCHEMA_KEY, []) of
|
||||
{ok, Blob} ->
|
||||
Schema = binary_to_term(Blob),
|
||||
%% Sanity check:
|
||||
#{current_generation := _, prototype := _} = Schema,
|
||||
Schema;
|
||||
not_found ->
|
||||
not_found
|
||||
end.
|
||||
|
||||
%% -spec store_cfs(rocksdb:db_handle(), [{string(), rocksdb:cf_handle()}]) -> ok.
|
||||
%% store_cfs(DBHandle, CFRefs) ->
|
||||
%% lists:foreach(
|
||||
%% fun({CFName, CFRef}) ->
|
||||
%% persistent_term:put({self(), CFName}, {DBHandle, CFRef})
|
||||
%% end,
|
||||
%% CFRefs).
|
||||
-spec put_schema_persistent(rocksdb:db_handle(), shard_schema()) -> ok.
|
||||
put_schema_persistent(DB, Schema) ->
|
||||
Blob = term_to_binary(Schema),
|
||||
rocksdb:put(DB, ?ROCKSDB_SCHEMA_KEY, Blob, []).
|
||||
|
||||
-undef(ROCKSDB_SCHEMA_KEY).
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
-behaviour(supervisor).
|
||||
|
||||
%% API:
|
||||
-export([start_link/0, start_shard/2, stop_shard/1]).
|
||||
-export([start_link/0, start_shard/2, stop_shard/1, ensure_shard/2]).
|
||||
|
||||
%% behaviour callbacks:
|
||||
-export([init/1]).
|
||||
|
@ -25,7 +25,7 @@
|
|||
start_link() ->
|
||||
supervisor:start_link({local, ?SUP}, ?MODULE, []).
|
||||
|
||||
-spec start_shard(emqx_ds:shard(), emqx_ds_storage_layer:options()) ->
|
||||
-spec start_shard(emqx_ds_replication_layer:shard_id(), emqx_ds:create_db_opts()) ->
|
||||
supervisor:startchild_ret().
|
||||
start_shard(Shard, Options) ->
|
||||
supervisor:start_child(?SUP, shard_child_spec(Shard, Options)).
|
||||
|
@ -35,6 +35,17 @@ stop_shard(Shard) ->
|
|||
ok = supervisor:terminate_child(?SUP, Shard),
|
||||
ok = supervisor:delete_child(?SUP, Shard).
|
||||
|
||||
-spec ensure_shard(emqx_ds:shard(), emqx_ds_storage_layer:options()) -> ok | {error, _Reason}.
|
||||
ensure_shard(Shard, Options) ->
|
||||
case start_shard(Shard, Options) of
|
||||
{ok, _Pid} ->
|
||||
ok;
|
||||
{error, {already_started, _Pid}} ->
|
||||
ok;
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
%%================================================================================
|
||||
%% behaviour callbacks
|
||||
%%================================================================================
|
||||
|
@ -52,7 +63,7 @@ init([]) ->
|
|||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
-spec shard_child_spec(emqx_ds:shard(), emqx_ds_storage_layer:options()) ->
|
||||
-spec shard_child_spec(emqx_ds_replication_layer:shard_id(), emqx_ds:create_db_opts()) ->
|
||||
supervisor:child_spec().
|
||||
shard_child_spec(Shard, Options) ->
|
||||
#{
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
%% @doc Reference implementation of the storage.
|
||||
%%
|
||||
%% Trivial, extremely slow and inefficient. It also doesn't handle
|
||||
%% restart of the Erlang node properly, so obviously it's only to be
|
||||
%% used for testing.
|
||||
-module(emqx_ds_storage_reference).
|
||||
|
||||
-behaviour(emqx_ds_storage_layer).
|
||||
|
||||
%% API:
|
||||
-export([]).
|
||||
|
||||
%% behavior callbacks:
|
||||
-export([create/4, open/5, store_batch/4, get_streams/4, make_iterator/5, next/4]).
|
||||
|
||||
%% internal exports:
|
||||
-export([]).
|
||||
|
||||
-export_type([options/0]).
|
||||
|
||||
-include_lib("emqx_utils/include/emqx_message.hrl").
|
||||
|
||||
%%================================================================================
|
||||
%% Type declarations
|
||||
%%================================================================================
|
||||
|
||||
-type options() :: #{}.
|
||||
|
||||
%% Permanent state:
|
||||
-record(schema, {}).
|
||||
|
||||
%% Runtime state:
|
||||
-record(s, {
|
||||
db :: rocksdb:db_handle(),
|
||||
cf :: rocksdb:cf_handle()
|
||||
}).
|
||||
|
||||
-record(stream, {}).
|
||||
|
||||
-record(it, {
|
||||
topic_filter :: emqx_ds:topic_filter(),
|
||||
start_time :: emqx_ds:time(),
|
||||
last_seen_message_key = first :: binary() | first
|
||||
}).
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
%%================================================================================
|
||||
%% behavior callbacks
|
||||
%%================================================================================
|
||||
|
||||
create(_ShardId, DBHandle, GenId, _Options) ->
|
||||
CFName = data_cf(GenId),
|
||||
{ok, CFHandle} = rocksdb:create_column_family(DBHandle, CFName, []),
|
||||
Schema = #schema{},
|
||||
{Schema, [{CFName, CFHandle}]}.
|
||||
|
||||
open(_Shard, DBHandle, GenId, CFRefs, #schema{}) ->
|
||||
{_, CF} = lists:keyfind(data_cf(GenId), 1, CFRefs),
|
||||
#s{db = DBHandle, cf = CF}.
|
||||
|
||||
store_batch(_ShardId, #s{db = DB, cf = CF}, Messages, _Options) ->
|
||||
lists:foreach(
|
||||
fun(Msg) ->
|
||||
Id = erlang:unique_integer([monotonic]),
|
||||
Key = <<Id:64>>,
|
||||
Val = term_to_binary(Msg),
|
||||
rocksdb:put(DB, CF, Key, Val, [])
|
||||
end,
|
||||
Messages
|
||||
).
|
||||
|
||||
get_streams(_Shard, _Data, _TopicFilter, _StartTime) ->
|
||||
[#stream{}].
|
||||
|
||||
make_iterator(_Shard, _Data, #stream{}, TopicFilter, StartTime) ->
|
||||
{ok, #it{
|
||||
topic_filter = TopicFilter,
|
||||
start_time = StartTime
|
||||
}}.
|
||||
|
||||
next(_Shard, #s{db = DB, cf = CF}, It0, BatchSize) ->
|
||||
#it{topic_filter = TopicFilter, start_time = StartTime, last_seen_message_key = Key0} = It0,
|
||||
{ok, ITHandle} = rocksdb:iterator(DB, CF, []),
|
||||
Action =
|
||||
case Key0 of
|
||||
first ->
|
||||
first;
|
||||
_ ->
|
||||
_ = rocksdb:iterator_move(ITHandle, Key0),
|
||||
next
|
||||
end,
|
||||
{Key, Messages} = do_next(TopicFilter, StartTime, ITHandle, Action, BatchSize, Key0, []),
|
||||
rocksdb:iterator_close(ITHandle),
|
||||
It = It0#it{last_seen_message_key = Key},
|
||||
{ok, It, lists:reverse(Messages)}.
|
||||
|
||||
%%================================================================================
|
||||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
do_next(_, _, _, _, 0, Key, Acc) ->
|
||||
{Key, Acc};
|
||||
do_next(TopicFilter, StartTime, IT, Action, NLeft, Key0, Acc) ->
|
||||
case rocksdb:iterator_move(IT, Action) of
|
||||
{ok, Key, Blob} ->
|
||||
Msg = #message{topic = Topic, timestamp = TS} = binary_to_term(Blob),
|
||||
case emqx_topic:match(Topic, TopicFilter) andalso TS >= StartTime of
|
||||
true ->
|
||||
do_next(TopicFilter, StartTime, IT, next, NLeft - 1, Key, [Msg | Acc]);
|
||||
false ->
|
||||
do_next(TopicFilter, StartTime, IT, next, NLeft, Key, Acc)
|
||||
end;
|
||||
{error, invalid_iterator} ->
|
||||
{Key0, Acc}
|
||||
end.
|
||||
|
||||
%% @doc Generate a column family ID for the MQTT messages
|
||||
-spec data_cf(emqx_ds_storage_layer:gen_id()) -> [char()].
|
||||
data_cf(GenId) ->
|
||||
"emqx_ds_storage_reference" ++ integer_to_list(GenId).
|
|
@ -30,7 +30,7 @@ start_link() ->
|
|||
%%================================================================================
|
||||
|
||||
init([]) ->
|
||||
Children = [shard_sup()],
|
||||
Children = [storage_layer_sup()],
|
||||
SupFlags = #{
|
||||
strategy => one_for_all,
|
||||
intensity => 0,
|
||||
|
@ -42,7 +42,7 @@ init([]) ->
|
|||
%% Internal functions
|
||||
%%================================================================================
|
||||
|
||||
shard_sup() ->
|
||||
storage_layer_sup() ->
|
||||
#{
|
||||
id => local_store_shard_sup,
|
||||
start => {emqx_ds_storage_layer_sup, start_link, []},
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
{vsn, "0.1.6"},
|
||||
{modules, []},
|
||||
{registered, []},
|
||||
{applications, [kernel, stdlib, rocksdb, gproc, mria]},
|
||||
{applications, [kernel, stdlib, rocksdb, gproc, mria, emqx_utils]},
|
||||
{mod, {emqx_ds_app, []}},
|
||||
{env, []}
|
||||
]}.
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_proto_v1).
|
||||
|
||||
-behavior(emqx_bpapi).
|
||||
|
||||
-include_lib("emqx_utils/include/bpapi.hrl").
|
||||
%% API:
|
||||
-export([open_shard/3, drop_shard/2, get_streams/4, make_iterator/5, next/4]).
|
||||
|
||||
%% behavior callbacks:
|
||||
-export([introduced_in/0]).
|
||||
|
||||
%%================================================================================
|
||||
%% API funcions
|
||||
%%================================================================================
|
||||
|
||||
-spec open_shard(node(), emqx_ds_replication_layer:shard_id(), emqx_ds:create_db_opts()) ->
|
||||
ok.
|
||||
open_shard(Node, Shard, Opts) ->
|
||||
erpc:call(Node, emqx_ds_replication_layer, do_open_shard_v1, [Shard, Opts]).
|
||||
|
||||
-spec drop_shard(node(), emqx_ds_replication_layer:shard_id()) ->
|
||||
ok.
|
||||
drop_shard(Node, Shard) ->
|
||||
erpc:call(Node, emqx_ds_replication_layer, do_drop_shard_v1, [Shard]).
|
||||
|
||||
-spec get_streams(
|
||||
node(), emqx_ds_replication_layer:shard_id(), emqx_ds:topic_filter(), emqx_ds:time()
|
||||
) ->
|
||||
[{integer(), emqx_ds_replication_layer:stream()}].
|
||||
get_streams(Node, Shard, TopicFilter, Time) ->
|
||||
erpc:call(Node, emqx_ds_replication_layer, do_get_streams_v1, [Shard, TopicFilter, Time]).
|
||||
|
||||
-spec make_iterator(
|
||||
node(),
|
||||
emqx_ds_replication_layer:shard_id(),
|
||||
emqx_ds_storage_layer:stream(),
|
||||
emqx_ds:topic_filter(),
|
||||
emqx_ds:time()
|
||||
) ->
|
||||
{ok, emqx_ds_replication_layer:iterator()} | {error, _}.
|
||||
make_iterator(Node, Shard, Stream, TopicFilter, StartTime) ->
|
||||
erpc:call(Node, emqx_ds_replication_layer, do_make_iterator_v1, [
|
||||
Shard, Stream, TopicFilter, StartTime
|
||||
]).
|
||||
|
||||
-spec next(
|
||||
node(), emqx_ds_replication_layer:shard_id(), emqx_ds_storage_layer:iterator(), pos_integer()
|
||||
) ->
|
||||
{ok, emqx_ds_storage_layer:iterator(), [emqx_types:messages()]}
|
||||
| {ok, end_of_stream}
|
||||
| {error, _}.
|
||||
next(Node, Shard, Iter, BatchSize) ->
|
||||
erpc:call(Node, emqx_ds_replication_layer, do_next_v1, [Shard, Iter, BatchSize]).
|
||||
|
||||
%%================================================================================
|
||||
%% behavior callbacks
|
||||
%%================================================================================
|
||||
|
||||
introduced_in() ->
|
||||
"5.4.0".
|
|
@ -0,0 +1,146 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("stdlib/include/assert.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
|
||||
opts() ->
|
||||
#{
|
||||
backend => builtin,
|
||||
storage => {emqx_ds_storage_reference, #{}}
|
||||
}.
|
||||
|
||||
%% A simple smoke test that verifies that opening/closing the DB
|
||||
%% doesn't crash, and not much else
|
||||
t_00_smoke_open_drop(_Config) ->
|
||||
DB = 'DB',
|
||||
?assertMatch(ok, emqx_ds:open_db(DB, opts())),
|
||||
?assertMatch(ok, emqx_ds:open_db(DB, opts())),
|
||||
?assertMatch(ok, emqx_ds:drop_db(DB)).
|
||||
|
||||
%% A simple smoke test that verifies that storing the messages doesn't
|
||||
%% crash
|
||||
t_01_smoke_store(_Config) ->
|
||||
DB = default,
|
||||
?assertMatch(ok, emqx_ds:open_db(DB, opts())),
|
||||
Msg = message(<<"foo/bar">>, <<"foo">>, 0),
|
||||
?assertMatch(ok, emqx_ds:store_batch(DB, [Msg])).
|
||||
|
||||
%% A simple smoke test that verifies that getting the list of streams
|
||||
%% doesn't crash and that iterators can be opened.
|
||||
t_02_smoke_get_streams_start_iter(_Config) ->
|
||||
DB = ?FUNCTION_NAME,
|
||||
?assertMatch(ok, emqx_ds:open_db(DB, opts())),
|
||||
StartTime = 0,
|
||||
TopicFilter = ['#'],
|
||||
[{Rank, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime),
|
||||
?assertMatch({_, _}, Rank),
|
||||
?assertMatch({ok, _Iter}, emqx_ds:make_iterator(Stream, TopicFilter, StartTime)).
|
||||
|
||||
%% A simple smoke test that verifies that it's possible to iterate
|
||||
%% over messages.
|
||||
t_03_smoke_iterate(_Config) ->
|
||||
DB = ?FUNCTION_NAME,
|
||||
?assertMatch(ok, emqx_ds:open_db(DB, opts())),
|
||||
StartTime = 0,
|
||||
TopicFilter = ['#'],
|
||||
Msgs = [
|
||||
message(<<"foo/bar">>, <<"1">>, 0),
|
||||
message(<<"foo">>, <<"2">>, 1),
|
||||
message(<<"bar/bar">>, <<"3">>, 2)
|
||||
],
|
||||
?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)),
|
||||
[{_, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime),
|
||||
{ok, Iter0} = emqx_ds:make_iterator(Stream, TopicFilter, StartTime),
|
||||
{ok, Iter, Batch} = iterate(Iter0, 1),
|
||||
?assertEqual(Msgs, Batch, {Iter0, Iter}).
|
||||
|
||||
%% Verify that iterators survive restart of the application. This is
|
||||
%% an important property, since the lifetime of the iterators is tied
|
||||
%% to the external resources, such as clients' sessions, and they
|
||||
%% should always be able to continue replaying the topics from where
|
||||
%% they are left off.
|
||||
t_04_restart(_Config) ->
|
||||
DB = ?FUNCTION_NAME,
|
||||
?assertMatch(ok, emqx_ds:open_db(DB, opts())),
|
||||
TopicFilter = ['#'],
|
||||
StartTime = 0,
|
||||
Msgs = [
|
||||
message(<<"foo/bar">>, <<"1">>, 0),
|
||||
message(<<"foo">>, <<"2">>, 1),
|
||||
message(<<"bar/bar">>, <<"3">>, 2)
|
||||
],
|
||||
?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)),
|
||||
[{_, Stream}] = emqx_ds:get_streams(DB, TopicFilter, StartTime),
|
||||
{ok, Iter0} = emqx_ds:make_iterator(Stream, TopicFilter, StartTime),
|
||||
%% Restart the application:
|
||||
?tp(warning, emqx_ds_SUITE_restart_app, #{}),
|
||||
ok = application:stop(emqx_durable_storage),
|
||||
{ok, _} = application:ensure_all_started(emqx_durable_storage),
|
||||
ok = emqx_ds:open_db(DB, opts()),
|
||||
%% The old iterator should be still operational:
|
||||
{ok, Iter, Batch} = iterate(Iter0, 1),
|
||||
?assertEqual(Msgs, Batch, {Iter0, Iter}).
|
||||
|
||||
message(Topic, Payload, PublishedAt) ->
|
||||
#message{
|
||||
topic = Topic,
|
||||
payload = Payload,
|
||||
timestamp = PublishedAt,
|
||||
id = emqx_guid:gen()
|
||||
}.
|
||||
|
||||
iterate(It, BatchSize) ->
|
||||
iterate(It, BatchSize, []).
|
||||
|
||||
iterate(It0, BatchSize, Acc) ->
|
||||
case emqx_ds:next(It0, BatchSize) of
|
||||
{ok, It, []} ->
|
||||
{ok, It, Acc};
|
||||
{ok, It, Msgs} ->
|
||||
iterate(It, BatchSize, Acc ++ Msgs);
|
||||
Ret ->
|
||||
Ret
|
||||
end.
|
||||
|
||||
%% CT callbacks
|
||||
|
||||
all() -> emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
Apps = emqx_cth_suite:start(
|
||||
[mria, emqx_durable_storage],
|
||||
#{work_dir => ?config(priv_dir, Config)}
|
||||
),
|
||||
[{apps, Apps} | Config].
|
||||
|
||||
end_per_suite(Config) ->
|
||||
ok = emqx_cth_suite:stop(?config(apps, Config)),
|
||||
ok.
|
||||
|
||||
init_per_testcase(_TC, Config) ->
|
||||
%% snabbkaffe:fix_ct_logging(),
|
||||
application:ensure_all_started(emqx_durable_storage),
|
||||
Config.
|
||||
|
||||
end_per_testcase(_TC, _Config) ->
|
||||
ok = application:stop(emqx_durable_storage).
|
|
@ -1,188 +0,0 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_message_storage_bitmask_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("stdlib/include/assert.hrl").
|
||||
|
||||
-import(emqx_ds_message_storage_bitmask, [
|
||||
make_keymapper/1,
|
||||
keymapper_info/1,
|
||||
compute_topic_bitmask/2,
|
||||
compute_time_bitmask/1,
|
||||
compute_topic_seek/4
|
||||
]).
|
||||
|
||||
all() -> emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
t_make_keymapper(_) ->
|
||||
?assertMatch(
|
||||
#{
|
||||
source := [
|
||||
{timestamp, 9, 23},
|
||||
{hash, level, 2},
|
||||
{hash, level, 4},
|
||||
{hash, levels, 8},
|
||||
{timestamp, 0, 9}
|
||||
],
|
||||
bitsize := 46,
|
||||
epoch := 512
|
||||
},
|
||||
keymapper_info(
|
||||
make_keymapper(#{
|
||||
timestamp_bits => 32,
|
||||
topic_bits_per_level => [2, 4, 8],
|
||||
epoch => 1000
|
||||
})
|
||||
)
|
||||
).
|
||||
|
||||
t_make_keymapper_single_hash_level(_) ->
|
||||
?assertMatch(
|
||||
#{
|
||||
source := [
|
||||
{timestamp, 0, 32},
|
||||
{hash, levels, 16}
|
||||
],
|
||||
bitsize := 48,
|
||||
epoch := 1
|
||||
},
|
||||
keymapper_info(
|
||||
make_keymapper(#{
|
||||
timestamp_bits => 32,
|
||||
topic_bits_per_level => [16],
|
||||
epoch => 1
|
||||
})
|
||||
)
|
||||
).
|
||||
|
||||
t_make_keymapper_no_timestamp(_) ->
|
||||
?assertMatch(
|
||||
#{
|
||||
source := [
|
||||
{hash, level, 4},
|
||||
{hash, level, 8},
|
||||
{hash, levels, 16}
|
||||
],
|
||||
bitsize := 28,
|
||||
epoch := 1
|
||||
},
|
||||
keymapper_info(
|
||||
make_keymapper(#{
|
||||
timestamp_bits => 0,
|
||||
topic_bits_per_level => [4, 8, 16],
|
||||
epoch => 42
|
||||
})
|
||||
)
|
||||
).
|
||||
|
||||
t_compute_topic_bitmask(_) ->
|
||||
KM = make_keymapper(#{topic_bits_per_level => [3, 4, 5, 2], timestamp_bits => 0, epoch => 1}),
|
||||
?assertEqual(
|
||||
2#111_1111_11111_11,
|
||||
compute_topic_bitmask([<<"foo">>, <<"bar">>], KM)
|
||||
),
|
||||
?assertEqual(
|
||||
2#111_0000_11111_11,
|
||||
compute_topic_bitmask([<<"foo">>, '+'], KM)
|
||||
),
|
||||
?assertEqual(
|
||||
2#111_0000_00000_11,
|
||||
compute_topic_bitmask([<<"foo">>, '+', '+'], KM)
|
||||
),
|
||||
?assertEqual(
|
||||
2#111_0000_11111_00,
|
||||
compute_topic_bitmask([<<"foo">>, '+', <<"bar">>, '+'], KM)
|
||||
).
|
||||
|
||||
t_compute_topic_bitmask_wildcard(_) ->
|
||||
KM = make_keymapper(#{topic_bits_per_level => [3, 4, 5, 2], timestamp_bits => 0, epoch => 1}),
|
||||
?assertEqual(
|
||||
2#000_0000_00000_00,
|
||||
compute_topic_bitmask(['#'], KM)
|
||||
),
|
||||
?assertEqual(
|
||||
2#111_0000_00000_00,
|
||||
compute_topic_bitmask([<<"foo">>, '#'], KM)
|
||||
),
|
||||
?assertEqual(
|
||||
2#111_1111_11111_00,
|
||||
compute_topic_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, '#'], KM)
|
||||
).
|
||||
|
||||
t_compute_topic_bitmask_wildcard_long_tail(_) ->
|
||||
KM = make_keymapper(#{topic_bits_per_level => [3, 4, 5, 2], timestamp_bits => 0, epoch => 1}),
|
||||
?assertEqual(
|
||||
2#111_1111_11111_11,
|
||||
compute_topic_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, <<"xyzzy">>], KM)
|
||||
),
|
||||
?assertEqual(
|
||||
2#111_1111_11111_00,
|
||||
compute_topic_bitmask([<<"foo">>, <<"bar">>, <<"baz">>, <<>>, '#'], KM)
|
||||
).
|
||||
|
||||
t_compute_time_bitmask(_) ->
|
||||
KM = make_keymapper(#{topic_bits_per_level => [1, 2, 3], timestamp_bits => 10, epoch => 200}),
|
||||
?assertEqual(2#111_000000_1111111, compute_time_bitmask(KM)).
|
||||
|
||||
t_compute_time_bitmask_epoch_only(_) ->
|
||||
KM = make_keymapper(#{topic_bits_per_level => [1, 2, 3], timestamp_bits => 10, epoch => 1}),
|
||||
?assertEqual(2#1111111111_000000, compute_time_bitmask(KM)).
|
||||
|
||||
%% Filter = |123|***|678|***|
|
||||
%% Mask = |123|***|678|***|
|
||||
%% Key1 = |123|011|108|121| → Seek = 0 |123|011|678|000|
|
||||
%% Key2 = |123|011|679|919| → Seek = 0 |123|012|678|000|
|
||||
%% Key3 = |123|999|679|001| → Seek = 1 |123|000|678|000| → eos
|
||||
%% Key4 = |125|011|179|017| → Seek = 1 |123|000|678|000| → eos
|
||||
|
||||
t_compute_next_topic_seek(_) ->
|
||||
KM = make_keymapper(#{topic_bits_per_level => [8, 8, 16, 12], timestamp_bits => 0, epoch => 1}),
|
||||
?assertMatch(
|
||||
none,
|
||||
compute_topic_seek(
|
||||
16#FD_42_4242_043,
|
||||
16#FD_42_4242_042,
|
||||
16#FF_FF_FFFF_FFF,
|
||||
KM
|
||||
)
|
||||
),
|
||||
?assertMatch(
|
||||
16#FD_11_0678_000,
|
||||
compute_topic_seek(
|
||||
16#FD_11_0108_121,
|
||||
16#FD_00_0678_000,
|
||||
16#FF_00_FFFF_000,
|
||||
KM
|
||||
)
|
||||
),
|
||||
?assertMatch(
|
||||
16#FD_12_0678_000,
|
||||
compute_topic_seek(
|
||||
16#FD_11_0679_919,
|
||||
16#FD_00_0678_000,
|
||||
16#FF_00_FFFF_000,
|
||||
KM
|
||||
)
|
||||
),
|
||||
?assertMatch(
|
||||
none,
|
||||
compute_topic_seek(
|
||||
16#FD_FF_0679_001,
|
||||
16#FD_00_0678_000,
|
||||
16#FF_00_FFFF_000,
|
||||
KM
|
||||
)
|
||||
),
|
||||
?assertMatch(
|
||||
none,
|
||||
compute_topic_seek(
|
||||
16#FE_11_0179_017,
|
||||
16#FD_00_0678_000,
|
||||
16#FF_00_FFFF_000,
|
||||
KM
|
||||
)
|
||||
).
|
|
@ -0,0 +1,396 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_storage_bitfield_lts_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
|
||||
-include_lib("stdlib/include/assert.hrl").
|
||||
|
||||
-define(SHARD, shard(?FUNCTION_NAME)).
|
||||
|
||||
-define(DEFAULT_CONFIG, #{
|
||||
backend => builtin,
|
||||
storage => {emqx_ds_storage_bitfield_lts, #{}}
|
||||
}).
|
||||
|
||||
-define(COMPACT_CONFIG, #{
|
||||
backend => builtin,
|
||||
storage =>
|
||||
{emqx_ds_storage_bitfield_lts, #{
|
||||
bits_per_wildcard_level => 8
|
||||
}}
|
||||
}).
|
||||
|
||||
%% Smoke test for opening and reopening the database
|
||||
t_open(_Config) ->
|
||||
ok = emqx_ds_storage_layer_sup:stop_shard(?SHARD),
|
||||
{ok, _} = emqx_ds_storage_layer_sup:start_shard(?SHARD, #{}).
|
||||
|
||||
%% Smoke test of store function
|
||||
t_store(_Config) ->
|
||||
MessageID = emqx_guid:gen(),
|
||||
PublishedAt = 1000,
|
||||
Topic = <<"foo/bar">>,
|
||||
Payload = <<"message">>,
|
||||
Msg = #message{
|
||||
id = MessageID,
|
||||
topic = Topic,
|
||||
payload = Payload,
|
||||
timestamp = PublishedAt
|
||||
},
|
||||
?assertMatch(ok, emqx_ds_storage_layer:store_batch(?SHARD, [Msg], #{})).
|
||||
|
||||
%% Smoke test for iteration through a concrete topic
|
||||
t_iterate(_Config) ->
|
||||
%% Prepare data:
|
||||
Topics = [<<"foo/bar">>, <<"foo/bar/baz">>, <<"a">>],
|
||||
Timestamps = lists:seq(1, 10),
|
||||
Batch = [
|
||||
make_message(PublishedAt, Topic, integer_to_binary(PublishedAt))
|
||||
|| Topic <- Topics, PublishedAt <- Timestamps
|
||||
],
|
||||
ok = emqx_ds_storage_layer:store_batch(?SHARD, Batch, []),
|
||||
%% Iterate through individual topics:
|
||||
[
|
||||
begin
|
||||
[{_Rank, Stream}] = emqx_ds_storage_layer:get_streams(?SHARD, parse_topic(Topic), 0),
|
||||
{ok, It} = emqx_ds_storage_layer:make_iterator(?SHARD, Stream, parse_topic(Topic), 0),
|
||||
{ok, NextIt, Messages} = emqx_ds_storage_layer:next(?SHARD, It, 100),
|
||||
?assertEqual(
|
||||
lists:map(fun integer_to_binary/1, Timestamps),
|
||||
payloads(Messages)
|
||||
),
|
||||
{ok, _, []} = emqx_ds_storage_layer:next(?SHARD, NextIt, 100)
|
||||
end
|
||||
|| Topic <- Topics
|
||||
],
|
||||
ok.
|
||||
|
||||
-define(assertSameSet(A, B), ?assertEqual(lists:sort(A), lists:sort(B))).
|
||||
|
||||
%% Smoke test that verifies that concrete topics are mapped to
|
||||
%% individual streams, unless there's too many of them.
|
||||
t_get_streams(_Config) ->
|
||||
%% Prepare data (without wildcards):
|
||||
Topics = [<<"foo/bar">>, <<"foo/bar/baz">>, <<"a">>],
|
||||
Timestamps = lists:seq(1, 10),
|
||||
Batch = [
|
||||
make_message(PublishedAt, Topic, integer_to_binary(PublishedAt))
|
||||
|| Topic <- Topics, PublishedAt <- Timestamps
|
||||
],
|
||||
ok = emqx_ds_storage_layer:store_batch(?SHARD, Batch, []),
|
||||
GetStream = fun(Topic) ->
|
||||
StartTime = 0,
|
||||
emqx_ds_storage_layer:get_streams(?SHARD, parse_topic(Topic), StartTime)
|
||||
end,
|
||||
%% Get streams for individual topics to use as a reference for later:
|
||||
[FooBar = {_, _}] = GetStream(<<"foo/bar">>),
|
||||
[FooBarBaz] = GetStream(<<"foo/bar/baz">>),
|
||||
[A] = GetStream(<<"a">>),
|
||||
%% Restart shard to make sure trie is persisted and restored:
|
||||
ok = emqx_ds_storage_layer_sup:stop_shard(?SHARD),
|
||||
{ok, _} = emqx_ds_storage_layer_sup:start_shard(?SHARD, #{}),
|
||||
%% Verify that there are no "ghost streams" for topics that don't
|
||||
%% have any messages:
|
||||
[] = GetStream(<<"bar/foo">>),
|
||||
%% Test some wildcard patterns:
|
||||
?assertEqual([FooBar], GetStream("+/+")),
|
||||
?assertSameSet([FooBar, FooBarBaz], GetStream(<<"foo/#">>)),
|
||||
?assertSameSet([FooBar, FooBarBaz, A], GetStream(<<"#">>)),
|
||||
%% Now insert a bunch of messages with different topics to create wildcards:
|
||||
NewBatch = [
|
||||
begin
|
||||
B = integer_to_binary(I),
|
||||
make_message(100, <<"foo/bar/", B/binary>>, <<"filler", B/binary>>)
|
||||
end
|
||||
|| I <- lists:seq(1, 200)
|
||||
],
|
||||
ok = emqx_ds_storage_layer:store_batch(?SHARD, NewBatch, []),
|
||||
%% Check that "foo/bar/baz" topic now appears in two streams:
|
||||
%% "foo/bar/baz" and "foo/bar/+":
|
||||
NewStreams = lists:sort(GetStream("foo/bar/baz")),
|
||||
?assertMatch([_, _], NewStreams),
|
||||
?assert(lists:member(FooBarBaz, NewStreams)),
|
||||
%% Verify that size of the trie is still relatively small, even
|
||||
%% after processing 200+ topics:
|
||||
AllStreams = GetStream("#"),
|
||||
NTotal = length(AllStreams),
|
||||
?assert(NTotal < 30, {NTotal, '<', 30}),
|
||||
?assert(lists:member(FooBar, AllStreams)),
|
||||
?assert(lists:member(FooBarBaz, AllStreams)),
|
||||
?assert(lists:member(A, AllStreams)),
|
||||
ok.
|
||||
|
||||
t_replay(_Config) ->
|
||||
%% Create concrete topics:
|
||||
Topics = [<<"foo/bar">>, <<"foo/bar/baz">>],
|
||||
Timestamps = lists:seq(1, 10_000, 100),
|
||||
Batch1 = [
|
||||
make_message(PublishedAt, Topic, integer_to_binary(PublishedAt))
|
||||
|| Topic <- Topics, PublishedAt <- Timestamps
|
||||
],
|
||||
ok = emqx_ds_storage_layer:store_batch(?SHARD, Batch1, []),
|
||||
%% Create wildcard topics `wildcard/+/suffix/foo' and `wildcard/+/suffix/bar':
|
||||
Batch2 = [
|
||||
begin
|
||||
B = integer_to_binary(I),
|
||||
make_message(
|
||||
TS, <<"wildcard/", B/binary, "/suffix/", Suffix/binary>>, integer_to_binary(TS)
|
||||
)
|
||||
end
|
||||
|| I <- lists:seq(1, 200), TS <- Timestamps, Suffix <- [<<"foo">>, <<"bar">>]
|
||||
],
|
||||
ok = emqx_ds_storage_layer:store_batch(?SHARD, Batch2, []),
|
||||
%% Check various topic filters:
|
||||
Messages = Batch1 ++ Batch2,
|
||||
%% Missing topics (no ghost messages):
|
||||
?assertNot(check(?SHARD, <<"missing/foo/bar">>, 0, Messages)),
|
||||
%% Regular topics:
|
||||
?assert(check(?SHARD, <<"foo/bar">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"foo/bar/baz">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"foo/#">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"foo/+">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"foo/+/+">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"+/+/+">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"+/+/baz">>, 0, Messages)),
|
||||
%% Restart shard to make sure trie is persisted and restored:
|
||||
ok = emqx_ds_storage_layer_sup:stop_shard(?SHARD),
|
||||
{ok, _} = emqx_ds_storage_layer_sup:start_shard(?SHARD, #{}),
|
||||
%% Learned wildcard topics:
|
||||
?assertNot(check(?SHARD, <<"wildcard/1000/suffix/foo">>, 0, [])),
|
||||
?assert(check(?SHARD, <<"wildcard/1/suffix/foo">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"wildcard/100/suffix/foo">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"wildcard/+/suffix/foo">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"wildcard/1/suffix/+">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"wildcard/100/suffix/+">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"wildcard/#">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"wildcard/1/#">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"wildcard/100/#">>, 0, Messages)),
|
||||
?assert(check(?SHARD, <<"#">>, 0, Messages)),
|
||||
ok.
|
||||
|
||||
check(Shard, TopicFilter, StartTime, ExpectedMessages) ->
|
||||
ExpectedFiltered = lists:filter(
|
||||
fun(#message{topic = Topic, timestamp = TS}) ->
|
||||
emqx_topic:match(Topic, TopicFilter) andalso TS >= StartTime
|
||||
end,
|
||||
ExpectedMessages
|
||||
),
|
||||
?check_trace(
|
||||
#{timetrap => 10_000},
|
||||
begin
|
||||
Dump = dump_messages(Shard, TopicFilter, StartTime),
|
||||
verify_dump(TopicFilter, StartTime, Dump),
|
||||
Missing = ExpectedFiltered -- Dump,
|
||||
Extras = Dump -- ExpectedFiltered,
|
||||
?assertMatch(
|
||||
#{missing := [], unexpected := []},
|
||||
#{
|
||||
missing => Missing,
|
||||
unexpected => Extras,
|
||||
topic_filter => TopicFilter,
|
||||
start_time => StartTime
|
||||
}
|
||||
)
|
||||
end,
|
||||
[]
|
||||
),
|
||||
length(ExpectedFiltered) > 0.
|
||||
|
||||
verify_dump(TopicFilter, StartTime, Dump) ->
|
||||
lists:foldl(
|
||||
fun(#message{topic = Topic, timestamp = TS}, Acc) ->
|
||||
%% Verify that the topic of the message returned by the
|
||||
%% iterator matches the expected topic filter:
|
||||
?assert(emqx_topic:match(Topic, TopicFilter), {unexpected_topic, Topic, TopicFilter}),
|
||||
%% Verify that timestamp of the message is greater than
|
||||
%% the StartTime of the iterator:
|
||||
?assert(TS >= StartTime, {start_time, TopicFilter, TS, StartTime}),
|
||||
%% Verify that iterator didn't reorder messages
|
||||
%% (timestamps for each topic are growing):
|
||||
LastTopicTs = maps:get(Topic, Acc, -1),
|
||||
?assert(TS >= LastTopicTs, {topic_ts_reordering, Topic, TS, LastTopicTs}),
|
||||
Acc#{Topic => TS}
|
||||
end,
|
||||
#{},
|
||||
Dump
|
||||
).
|
||||
|
||||
dump_messages(Shard, TopicFilter, StartTime) ->
|
||||
Streams = emqx_ds_storage_layer:get_streams(Shard, parse_topic(TopicFilter), StartTime),
|
||||
lists:flatmap(
|
||||
fun({_Rank, Stream}) ->
|
||||
dump_stream(Shard, Stream, TopicFilter, StartTime)
|
||||
end,
|
||||
Streams
|
||||
).
|
||||
|
||||
dump_stream(Shard, Stream, TopicFilter, StartTime) ->
|
||||
BatchSize = 100,
|
||||
{ok, Iterator} = emqx_ds_storage_layer:make_iterator(
|
||||
Shard, Stream, parse_topic(TopicFilter), StartTime
|
||||
),
|
||||
Loop = fun
|
||||
F(It, 0) ->
|
||||
error({too_many_iterations, It});
|
||||
F(It, N) ->
|
||||
case emqx_ds_storage_layer:next(Shard, It, BatchSize) of
|
||||
end_of_stream ->
|
||||
[];
|
||||
{ok, _NextIt, []} ->
|
||||
[];
|
||||
{ok, NextIt, Batch} ->
|
||||
Batch ++ F(NextIt, N - 1)
|
||||
end
|
||||
end,
|
||||
MaxIterations = 1000000,
|
||||
Loop(Iterator, MaxIterations).
|
||||
|
||||
%% t_create_gen(_Config) ->
|
||||
%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 5, ?DEFAULT_CONFIG),
|
||||
%% ?assertEqual(
|
||||
%% {error, nonmonotonic},
|
||||
%% emqx_ds_storage_layer:create_generation(?SHARD, 1, ?DEFAULT_CONFIG)
|
||||
%% ),
|
||||
%% ?assertEqual(
|
||||
%% {error, nonmonotonic},
|
||||
%% emqx_ds_storage_layer:create_generation(?SHARD, 5, ?DEFAULT_CONFIG)
|
||||
%% ),
|
||||
%% {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG),
|
||||
%% Topics = ["foo/bar", "foo/bar/baz"],
|
||||
%% Timestamps = lists:seq(1, 100),
|
||||
%% [
|
||||
%% ?assertMatch({ok, [_]}, store(?SHARD, PublishedAt, Topic, <<>>))
|
||||
%% || Topic <- Topics, PublishedAt <- Timestamps
|
||||
%% ].
|
||||
|
||||
%% t_iterate_multigen(_Config) ->
|
||||
%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG),
|
||||
%% {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG),
|
||||
%% {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG),
|
||||
%% Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"],
|
||||
%% Timestamps = lists:seq(1, 100),
|
||||
%% _ = [
|
||||
%% store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt}))
|
||||
%% || Topic <- Topics, PublishedAt <- Timestamps
|
||||
%% ],
|
||||
%% ?assertEqual(
|
||||
%% lists:sort([
|
||||
%% {Topic, PublishedAt}
|
||||
%% || Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps
|
||||
%% ]),
|
||||
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)])
|
||||
%% ),
|
||||
%% ?assertEqual(
|
||||
%% lists:sort([
|
||||
%% {Topic, PublishedAt}
|
||||
%% || Topic <- ["a", "a/bar"], PublishedAt <- lists:seq(60, 100)
|
||||
%% ]),
|
||||
%% lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 60)])
|
||||
%% ).
|
||||
|
||||
%% t_iterate_multigen_preserve_restore(_Config) ->
|
||||
%% ReplayID = atom_to_binary(?FUNCTION_NAME),
|
||||
%% {ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG),
|
||||
%% {ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG),
|
||||
%% {ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 100, ?DEFAULT_CONFIG),
|
||||
%% Topics = ["foo/bar", "foo/bar/baz", "a/bar"],
|
||||
%% Timestamps = lists:seq(1, 100),
|
||||
%% TopicFilter = "foo/#",
|
||||
%% TopicsMatching = ["foo/bar", "foo/bar/baz"],
|
||||
%% _ = [
|
||||
%% store(?SHARD, TS, Topic, term_to_binary({Topic, TS}))
|
||||
%% || Topic <- Topics, TS <- Timestamps
|
||||
%% ],
|
||||
%% It0 = iterator(?SHARD, TopicFilter, 0),
|
||||
%% {It1, Res10} = iterate(It0, 10),
|
||||
%% % preserve mid-generation
|
||||
%% ok = emqx_ds_storage_layer:preserve_iterator(It1, ReplayID),
|
||||
%% {ok, It2} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID),
|
||||
%% {It3, Res100} = iterate(It2, 88),
|
||||
%% % preserve on the generation boundary
|
||||
%% ok = emqx_ds_storage_layer:preserve_iterator(It3, ReplayID),
|
||||
%% {ok, It4} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID),
|
||||
%% {It5, Res200} = iterate(It4, 1000),
|
||||
%% ?assertEqual({end_of_stream, []}, iterate(It5, 1)),
|
||||
%% ?assertEqual(
|
||||
%% lists:sort([{Topic, TS} || Topic <- TopicsMatching, TS <- Timestamps]),
|
||||
%% lists:sort([binary_to_term(Payload) || Payload <- Res10 ++ Res100 ++ Res200])
|
||||
%% ),
|
||||
%% ?assertEqual(
|
||||
%% ok,
|
||||
%% emqx_ds_storage_layer:discard_iterator(?SHARD, ReplayID)
|
||||
%% ),
|
||||
%% ?assertEqual(
|
||||
%% {error, not_found},
|
||||
%% emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID)
|
||||
%% ).
|
||||
|
||||
make_message(PublishedAt, Topic, Payload) when is_list(Topic) ->
|
||||
make_message(PublishedAt, list_to_binary(Topic), Payload);
|
||||
make_message(PublishedAt, Topic, Payload) when is_binary(Topic) ->
|
||||
ID = emqx_guid:gen(),
|
||||
#message{
|
||||
id = ID,
|
||||
topic = Topic,
|
||||
timestamp = PublishedAt,
|
||||
payload = Payload
|
||||
}.
|
||||
|
||||
store(Shard, PublishedAt, TopicL, Payload) when is_list(TopicL) ->
|
||||
store(Shard, PublishedAt, list_to_binary(TopicL), Payload);
|
||||
store(Shard, PublishedAt, Topic, Payload) ->
|
||||
ID = emqx_guid:gen(),
|
||||
Msg = #message{
|
||||
id = ID,
|
||||
topic = Topic,
|
||||
timestamp = PublishedAt,
|
||||
payload = Payload
|
||||
},
|
||||
emqx_ds_storage_layer:message_store(Shard, [Msg], #{}).
|
||||
|
||||
payloads(Messages) ->
|
||||
lists:map(
|
||||
fun(#message{payload = P}) ->
|
||||
P
|
||||
end,
|
||||
Messages
|
||||
).
|
||||
|
||||
parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) ->
|
||||
Topic;
|
||||
parse_topic(Topic) ->
|
||||
emqx_topic:words(iolist_to_binary(Topic)).
|
||||
|
||||
%% CT callbacks
|
||||
|
||||
all() -> emqx_common_test_helpers:all(?MODULE).
|
||||
suite() -> [{timetrap, {seconds, 20}}].
|
||||
|
||||
init_per_suite(Config) ->
|
||||
{ok, _} = application:ensure_all_started(emqx_durable_storage),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = application:stop(emqx_durable_storage).
|
||||
|
||||
init_per_testcase(TC, Config) ->
|
||||
{ok, _} = emqx_ds_storage_layer_sup:start_shard(shard(TC), ?DEFAULT_CONFIG),
|
||||
Config.
|
||||
|
||||
end_per_testcase(TC, _Config) ->
|
||||
ok = emqx_ds_storage_layer_sup:stop_shard(shard(TC)).
|
||||
|
||||
shard(TC) ->
|
||||
{?MODULE, TC}.
|
||||
|
||||
keyspace(TC) ->
|
||||
TC.
|
||||
|
||||
set_keyspace_config(Keyspace, Config) ->
|
||||
ok = application:set_env(emqx_ds, keyspace_config, #{Keyspace => Config}).
|
|
@ -1,282 +0,0 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
-module(emqx_ds_storage_layer_SUITE).
|
||||
|
||||
-compile(export_all).
|
||||
-compile(nowarn_export_all).
|
||||
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
-include_lib("stdlib/include/assert.hrl").
|
||||
|
||||
-define(SHARD, shard(?FUNCTION_NAME)).
|
||||
|
||||
-define(DEFAULT_CONFIG,
|
||||
{emqx_ds_message_storage_bitmask, #{
|
||||
timestamp_bits => 64,
|
||||
topic_bits_per_level => [8, 8, 32, 16],
|
||||
epoch => 5,
|
||||
iteration => #{
|
||||
iterator_refresh => {every, 5}
|
||||
}
|
||||
}}
|
||||
).
|
||||
|
||||
-define(COMPACT_CONFIG,
|
||||
{emqx_ds_message_storage_bitmask, #{
|
||||
timestamp_bits => 16,
|
||||
topic_bits_per_level => [16, 16],
|
||||
epoch => 10
|
||||
}}
|
||||
).
|
||||
|
||||
%% Smoke test for opening and reopening the database
|
||||
t_open(_Config) ->
|
||||
ok = emqx_ds_storage_layer_sup:stop_shard(?SHARD),
|
||||
{ok, _} = emqx_ds_storage_layer_sup:start_shard(?SHARD, #{}).
|
||||
|
||||
%% Smoke test of store function
|
||||
t_store(_Config) ->
|
||||
MessageID = emqx_guid:gen(),
|
||||
PublishedAt = 1000,
|
||||
Topic = [<<"foo">>, <<"bar">>],
|
||||
Payload = <<"message">>,
|
||||
?assertMatch(ok, emqx_ds_storage_layer:store(?SHARD, MessageID, PublishedAt, Topic, Payload)).
|
||||
|
||||
%% Smoke test for iteration through a concrete topic
|
||||
t_iterate(_Config) ->
|
||||
%% Prepare data:
|
||||
Topics = [[<<"foo">>, <<"bar">>], [<<"foo">>, <<"bar">>, <<"baz">>], [<<"a">>]],
|
||||
Timestamps = lists:seq(1, 10),
|
||||
[
|
||||
emqx_ds_storage_layer:store(
|
||||
?SHARD,
|
||||
emqx_guid:gen(),
|
||||
PublishedAt,
|
||||
Topic,
|
||||
integer_to_binary(PublishedAt)
|
||||
)
|
||||
|| Topic <- Topics, PublishedAt <- Timestamps
|
||||
],
|
||||
%% Iterate through individual topics:
|
||||
[
|
||||
begin
|
||||
{ok, It} = emqx_ds_storage_layer:make_iterator(?SHARD, {Topic, 0}),
|
||||
Values = iterate(It),
|
||||
?assertEqual(lists:map(fun integer_to_binary/1, Timestamps), Values)
|
||||
end
|
||||
|| Topic <- Topics
|
||||
],
|
||||
ok.
|
||||
|
||||
%% Smoke test for iteration with wildcard topic filter
|
||||
t_iterate_wildcard(_Config) ->
|
||||
%% Prepare data:
|
||||
Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"],
|
||||
Timestamps = lists:seq(1, 10),
|
||||
_ = [
|
||||
store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt}))
|
||||
|| Topic <- Topics, PublishedAt <- Timestamps
|
||||
],
|
||||
?assertEqual(
|
||||
lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- Timestamps]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 0)])
|
||||
),
|
||||
?assertEqual(
|
||||
[],
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 10 + 1)])
|
||||
),
|
||||
?assertEqual(
|
||||
lists:sort([{Topic, PublishedAt} || Topic <- Topics, PublishedAt <- lists:seq(5, 10)]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "#", 5)])
|
||||
),
|
||||
?assertEqual(
|
||||
lists:sort([
|
||||
{Topic, PublishedAt}
|
||||
|| Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps
|
||||
]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)])
|
||||
),
|
||||
?assertEqual(
|
||||
lists:sort([{"foo/bar", PublishedAt} || PublishedAt <- Timestamps]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/+", 0)])
|
||||
),
|
||||
?assertEqual(
|
||||
[],
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/+/bar", 0)])
|
||||
),
|
||||
?assertEqual(
|
||||
lists:sort([
|
||||
{Topic, PublishedAt}
|
||||
|| Topic <- ["foo/bar", "foo/bar/baz", "a/bar"], PublishedAt <- Timestamps
|
||||
]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "+/bar/#", 0)])
|
||||
),
|
||||
?assertEqual(
|
||||
lists:sort([{Topic, PublishedAt} || Topic <- ["a", "a/bar"], PublishedAt <- Timestamps]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 0)])
|
||||
),
|
||||
?assertEqual(
|
||||
[],
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/+/+", 0)])
|
||||
),
|
||||
ok.
|
||||
|
||||
t_iterate_long_tail_wildcard(_Config) ->
|
||||
Topic = "b/c/d/e/f/g",
|
||||
TopicFilter = "b/c/d/e/+/+",
|
||||
Timestamps = lists:seq(1, 100),
|
||||
_ = [
|
||||
store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt}))
|
||||
|| PublishedAt <- Timestamps
|
||||
],
|
||||
?assertEqual(
|
||||
lists:sort([{"b/c/d/e/f/g", PublishedAt} || PublishedAt <- lists:seq(50, 100)]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, TopicFilter, 50)])
|
||||
).
|
||||
|
||||
t_create_gen(_Config) ->
|
||||
{ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 5, ?DEFAULT_CONFIG),
|
||||
?assertEqual(
|
||||
{error, nonmonotonic},
|
||||
emqx_ds_storage_layer:create_generation(?SHARD, 1, ?DEFAULT_CONFIG)
|
||||
),
|
||||
?assertEqual(
|
||||
{error, nonmonotonic},
|
||||
emqx_ds_storage_layer:create_generation(?SHARD, 5, ?DEFAULT_CONFIG)
|
||||
),
|
||||
{ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG),
|
||||
Topics = ["foo/bar", "foo/bar/baz"],
|
||||
Timestamps = lists:seq(1, 100),
|
||||
[
|
||||
?assertEqual(ok, store(?SHARD, PublishedAt, Topic, <<>>))
|
||||
|| Topic <- Topics, PublishedAt <- Timestamps
|
||||
].
|
||||
|
||||
t_iterate_multigen(_Config) ->
|
||||
{ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG),
|
||||
{ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG),
|
||||
{ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 1000, ?DEFAULT_CONFIG),
|
||||
Topics = ["foo/bar", "foo/bar/baz", "a", "a/bar"],
|
||||
Timestamps = lists:seq(1, 100),
|
||||
_ = [
|
||||
store(?SHARD, PublishedAt, Topic, term_to_binary({Topic, PublishedAt}))
|
||||
|| Topic <- Topics, PublishedAt <- Timestamps
|
||||
],
|
||||
?assertEqual(
|
||||
lists:sort([
|
||||
{Topic, PublishedAt}
|
||||
|| Topic <- ["foo/bar", "foo/bar/baz"], PublishedAt <- Timestamps
|
||||
]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "foo/#", 0)])
|
||||
),
|
||||
?assertEqual(
|
||||
lists:sort([
|
||||
{Topic, PublishedAt}
|
||||
|| Topic <- ["a", "a/bar"], PublishedAt <- lists:seq(60, 100)
|
||||
]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- iterate(?SHARD, "a/#", 60)])
|
||||
).
|
||||
|
||||
t_iterate_multigen_preserve_restore(_Config) ->
|
||||
ReplayID = atom_to_binary(?FUNCTION_NAME),
|
||||
{ok, 1} = emqx_ds_storage_layer:create_generation(?SHARD, 10, ?COMPACT_CONFIG),
|
||||
{ok, 2} = emqx_ds_storage_layer:create_generation(?SHARD, 50, ?DEFAULT_CONFIG),
|
||||
{ok, 3} = emqx_ds_storage_layer:create_generation(?SHARD, 100, ?DEFAULT_CONFIG),
|
||||
Topics = ["foo/bar", "foo/bar/baz", "a/bar"],
|
||||
Timestamps = lists:seq(1, 100),
|
||||
TopicFilter = "foo/#",
|
||||
TopicsMatching = ["foo/bar", "foo/bar/baz"],
|
||||
_ = [
|
||||
store(?SHARD, TS, Topic, term_to_binary({Topic, TS}))
|
||||
|| Topic <- Topics, TS <- Timestamps
|
||||
],
|
||||
It0 = iterator(?SHARD, TopicFilter, 0),
|
||||
{It1, Res10} = iterate(It0, 10),
|
||||
% preserve mid-generation
|
||||
ok = emqx_ds_storage_layer:preserve_iterator(It1, ReplayID),
|
||||
{ok, It2} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID),
|
||||
{It3, Res100} = iterate(It2, 88),
|
||||
% preserve on the generation boundary
|
||||
ok = emqx_ds_storage_layer:preserve_iterator(It3, ReplayID),
|
||||
{ok, It4} = emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID),
|
||||
{It5, Res200} = iterate(It4, 1000),
|
||||
?assertEqual(none, It5),
|
||||
?assertEqual(
|
||||
lists:sort([{Topic, TS} || Topic <- TopicsMatching, TS <- Timestamps]),
|
||||
lists:sort([binary_to_term(Payload) || Payload <- Res10 ++ Res100 ++ Res200])
|
||||
),
|
||||
?assertEqual(
|
||||
ok,
|
||||
emqx_ds_storage_layer:discard_iterator(?SHARD, ReplayID)
|
||||
),
|
||||
?assertEqual(
|
||||
{error, not_found},
|
||||
emqx_ds_storage_layer:restore_iterator(?SHARD, ReplayID)
|
||||
).
|
||||
|
||||
store(Shard, PublishedAt, Topic, Payload) ->
|
||||
ID = emqx_guid:gen(),
|
||||
emqx_ds_storage_layer:store(Shard, ID, PublishedAt, parse_topic(Topic), Payload).
|
||||
|
||||
iterate(DB, TopicFilter, StartTime) ->
|
||||
iterate(iterator(DB, TopicFilter, StartTime)).
|
||||
|
||||
iterate(It) ->
|
||||
case emqx_ds_storage_layer:next(It) of
|
||||
{value, Payload, ItNext} ->
|
||||
[Payload | iterate(ItNext)];
|
||||
none ->
|
||||
[]
|
||||
end.
|
||||
|
||||
iterate(It, 0) ->
|
||||
{It, []};
|
||||
iterate(It, N) ->
|
||||
case emqx_ds_storage_layer:next(It) of
|
||||
{value, Payload, ItNext} ->
|
||||
{ItFinal, Ps} = iterate(ItNext, N - 1),
|
||||
{ItFinal, [Payload | Ps]};
|
||||
none ->
|
||||
{none, []}
|
||||
end.
|
||||
|
||||
iterator(DB, TopicFilter, StartTime) ->
|
||||
{ok, It} = emqx_ds_storage_layer:make_iterator(DB, {parse_topic(TopicFilter), StartTime}),
|
||||
It.
|
||||
|
||||
parse_topic(Topic = [L | _]) when is_binary(L); is_atom(L) ->
|
||||
Topic;
|
||||
parse_topic(Topic) ->
|
||||
emqx_topic:words(iolist_to_binary(Topic)).
|
||||
|
||||
%% CT callbacks
|
||||
|
||||
all() -> emqx_common_test_helpers:all(?MODULE).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
{ok, _} = application:ensure_all_started(emqx_durable_storage),
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok = application:stop(emqx_durable_storage).
|
||||
|
||||
init_per_testcase(TC, Config) ->
|
||||
ok = set_keyspace_config(keyspace(TC), ?DEFAULT_CONFIG),
|
||||
{ok, _} = emqx_ds_storage_layer_sup:start_shard(shard(TC), #{}),
|
||||
Config.
|
||||
|
||||
end_per_testcase(TC, _Config) ->
|
||||
ok = emqx_ds_storage_layer_sup:stop_shard(shard(TC)).
|
||||
|
||||
keyspace(TC) ->
|
||||
list_to_atom(lists:concat([?MODULE, "_", TC])).
|
||||
|
||||
shard_id(_TC) ->
|
||||
<<"shard">>.
|
||||
|
||||
shard(TC) ->
|
||||
{keyspace(TC), shard_id(TC)}.
|
||||
|
||||
set_keyspace_config(Keyspace, Config) ->
|
||||
ok = application:set_env(emqx_ds, keyspace_config, #{Keyspace => Config}).
|
|
@ -4,9 +4,11 @@
|
|||
|
||||
-module(emqx_ds_message_storage_bitmask_shim).
|
||||
|
||||
-include_lib("emqx/include/emqx.hrl").
|
||||
|
||||
-export([open/0]).
|
||||
-export([close/1]).
|
||||
-export([store/5]).
|
||||
-export([store/2]).
|
||||
-export([iterate/2]).
|
||||
|
||||
-type topic() :: list(binary()).
|
||||
|
@ -25,20 +27,21 @@ close(Tab) ->
|
|||
true = ets:delete(Tab),
|
||||
ok.
|
||||
|
||||
-spec store(t(), emqx_guid:guid(), time(), topic(), binary()) ->
|
||||
-spec store(t(), emqx_types:message()) ->
|
||||
ok | {error, _TODO}.
|
||||
store(Tab, MessageID, PublishedAt, Topic, Payload) ->
|
||||
true = ets:insert(Tab, {{PublishedAt, MessageID}, Topic, Payload}),
|
||||
store(Tab, Msg = #message{id = MessageID, timestamp = PublishedAt}) ->
|
||||
true = ets:insert(Tab, {{PublishedAt, MessageID}, Msg}),
|
||||
ok.
|
||||
|
||||
-spec iterate(t(), emqx_ds:replay()) ->
|
||||
[binary()].
|
||||
iterate(Tab, {TopicFilter, StartTime}) ->
|
||||
iterate(Tab, {TopicFilter0, StartTime}) ->
|
||||
TopicFilter = iolist_to_binary(lists:join("/", TopicFilter0)),
|
||||
ets:foldr(
|
||||
fun({{PublishedAt, _}, Topic, Payload}, Acc) ->
|
||||
fun({{PublishedAt, _}, Msg = #message{topic = Topic}}, Acc) ->
|
||||
case emqx_topic:match(Topic, TopicFilter) of
|
||||
true when PublishedAt >= StartTime ->
|
||||
[Payload | Acc];
|
||||
[Msg | Acc];
|
||||
_ ->
|
||||
Acc
|
||||
end
|
||||
|
|
|
@ -1,466 +0,0 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-module(prop_replay_message_storage).
|
||||
|
||||
-include_lib("proper/include/proper.hrl").
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(WORK_DIR, ["_build", "test"]).
|
||||
-define(RUN_ID, {?MODULE, testrun_id}).
|
||||
|
||||
-define(KEYSPACE, ?MODULE).
|
||||
-define(SHARD_ID, <<"shard">>).
|
||||
-define(SHARD, {?KEYSPACE, ?SHARD_ID}).
|
||||
-define(GEN_ID, 42).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Properties
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
prop_bitstring_computes() ->
|
||||
?FORALL(
|
||||
Keymapper,
|
||||
keymapper(),
|
||||
?FORALL({Topic, Timestamp}, {topic(), integer()}, begin
|
||||
BS = emqx_ds_message_storage_bitmask:compute_bitstring(Topic, Timestamp, Keymapper),
|
||||
is_integer(BS) andalso (BS < (1 bsl get_keymapper_bitsize(Keymapper)))
|
||||
end)
|
||||
).
|
||||
|
||||
prop_topic_bitmask_computes() ->
|
||||
Keymapper = make_keymapper(16, [8, 12, 16], 100),
|
||||
?FORALL(TopicFilter, topic_filter(), begin
|
||||
Mask = emqx_ds_message_storage_bitmask:compute_topic_bitmask(TopicFilter, Keymapper),
|
||||
% topic bits + timestamp LSBs
|
||||
is_integer(Mask) andalso (Mask < (1 bsl (36 + 6)))
|
||||
end).
|
||||
|
||||
prop_next_seek_monotonic() ->
|
||||
?FORALL(
|
||||
{TopicFilter, StartTime, Keymapper},
|
||||
{topic_filter(), pos_integer(), keymapper()},
|
||||
begin
|
||||
Filter = emqx_ds_message_storage_bitmask:make_keyspace_filter(
|
||||
{TopicFilter, StartTime},
|
||||
Keymapper
|
||||
),
|
||||
?FORALL(
|
||||
Bitstring,
|
||||
bitstr(get_keymapper_bitsize(Keymapper)),
|
||||
emqx_ds_message_storage_bitmask:compute_next_seek(Bitstring, Filter) >= Bitstring
|
||||
)
|
||||
end
|
||||
).
|
||||
|
||||
prop_next_seek_eq_initial_seek() ->
|
||||
?FORALL(
|
||||
Filter,
|
||||
keyspace_filter(),
|
||||
emqx_ds_message_storage_bitmask:compute_initial_seek(Filter) =:=
|
||||
emqx_ds_message_storage_bitmask:compute_next_seek(0, Filter)
|
||||
).
|
||||
|
||||
prop_iterate_messages() ->
|
||||
TBPL = [4, 8, 12],
|
||||
Options = #{
|
||||
timestamp_bits => 32,
|
||||
topic_bits_per_level => TBPL,
|
||||
epoch => 200
|
||||
},
|
||||
% TODO
|
||||
% Shrinking is too unpredictable and leaves a LOT of garbage in the scratch dit.
|
||||
?FORALL(Stream, noshrink(non_empty(messages(topic(TBPL)))), begin
|
||||
Filepath = make_filepath(?FUNCTION_NAME, erlang:system_time(microsecond)),
|
||||
{DB, Handle} = open_db(Filepath, Options),
|
||||
Shim = emqx_ds_message_storage_bitmask_shim:open(),
|
||||
ok = store_db(DB, Stream),
|
||||
ok = store_shim(Shim, Stream),
|
||||
?FORALL(
|
||||
{
|
||||
{Topic, _},
|
||||
Pattern,
|
||||
StartTime
|
||||
},
|
||||
{
|
||||
nth(Stream),
|
||||
topic_filter_pattern(),
|
||||
start_time()
|
||||
},
|
||||
begin
|
||||
TopicFilter = make_topic_filter(Pattern, Topic),
|
||||
Iteration = {TopicFilter, StartTime},
|
||||
Messages = iterate_db(DB, Iteration),
|
||||
Reference = iterate_shim(Shim, Iteration),
|
||||
ok = close_db(Handle),
|
||||
ok = emqx_ds_message_storage_bitmask_shim:close(Shim),
|
||||
?WHENFAIL(
|
||||
begin
|
||||
io:format(user, " *** Filepath = ~s~n", [Filepath]),
|
||||
io:format(user, " *** TopicFilter = ~p~n", [TopicFilter]),
|
||||
io:format(user, " *** StartTime = ~p~n", [StartTime])
|
||||
end,
|
||||
is_list(Messages) andalso equals(Messages -- Reference, Reference -- Messages)
|
||||
)
|
||||
end
|
||||
)
|
||||
end).
|
||||
|
||||
prop_iterate_eq_iterate_with_preserve_restore() ->
|
||||
TBPL = [4, 8, 16, 12],
|
||||
Options = #{
|
||||
timestamp_bits => 32,
|
||||
topic_bits_per_level => TBPL,
|
||||
epoch => 500
|
||||
},
|
||||
{DB, _Handle} = open_db(make_filepath(?FUNCTION_NAME), Options),
|
||||
?FORALL(Stream, non_empty(messages(topic(TBPL))), begin
|
||||
% TODO
|
||||
% This proptest is impure because messages from testruns assumed to be
|
||||
% independent of each other are accumulated in the same storage. This
|
||||
% would probably confuse shrinker in the event a testrun fails.
|
||||
ok = store_db(DB, Stream),
|
||||
?FORALL(
|
||||
{
|
||||
{Topic, _},
|
||||
Pat,
|
||||
StartTime,
|
||||
Commands
|
||||
},
|
||||
{
|
||||
nth(Stream),
|
||||
topic_filter_pattern(),
|
||||
start_time(),
|
||||
shuffled(flat([non_empty(list({preserve, restore})), list(iterate)]))
|
||||
},
|
||||
begin
|
||||
Replay = {make_topic_filter(Pat, Topic), StartTime},
|
||||
Iterator = make_iterator(DB, Replay),
|
||||
Ctx = #{db => DB, replay => Replay},
|
||||
Messages = run_iterator_commands(Commands, Iterator, Ctx),
|
||||
equals(Messages, iterate_db(DB, Replay))
|
||||
end
|
||||
)
|
||||
end).
|
||||
|
||||
prop_iterate_eq_iterate_with_refresh() ->
|
||||
TBPL = [4, 8, 16, 12],
|
||||
Options = #{
|
||||
timestamp_bits => 32,
|
||||
topic_bits_per_level => TBPL,
|
||||
epoch => 500
|
||||
},
|
||||
{DB, _Handle} = open_db(make_filepath(?FUNCTION_NAME), Options),
|
||||
?FORALL(Stream, non_empty(messages(topic(TBPL))), begin
|
||||
% TODO
|
||||
% This proptest is also impure, see above.
|
||||
ok = store_db(DB, Stream),
|
||||
?FORALL(
|
||||
{
|
||||
{Topic, _},
|
||||
Pat,
|
||||
StartTime,
|
||||
RefreshEvery
|
||||
},
|
||||
{
|
||||
nth(Stream),
|
||||
topic_filter_pattern(),
|
||||
start_time(),
|
||||
pos_integer()
|
||||
},
|
||||
?TIMEOUT(5000, begin
|
||||
Replay = {make_topic_filter(Pat, Topic), StartTime},
|
||||
IterationOptions = #{iterator_refresh => {every, RefreshEvery}},
|
||||
Iterator = make_iterator(DB, Replay, IterationOptions),
|
||||
Messages = iterate_db(Iterator),
|
||||
equals(Messages, iterate_db(DB, Replay))
|
||||
end)
|
||||
)
|
||||
end).
|
||||
|
||||
% store_message_stream(DB, [{Topic, {Payload, ChunkNum, _ChunkCount}} | Rest]) ->
|
||||
% MessageID = emqx_guid:gen(),
|
||||
% PublishedAt = ChunkNum,
|
||||
% MessageID, PublishedAt, Topic
|
||||
% ]),
|
||||
% ok = emqx_ds_message_storage_bitmask:store(DB, MessageID, PublishedAt, Topic, Payload),
|
||||
% store_message_stream(DB, payload_gen:next(Rest));
|
||||
% store_message_stream(_Zone, []) ->
|
||||
% ok.
|
||||
|
||||
store_db(DB, Messages) ->
|
||||
lists:foreach(
|
||||
fun({Topic, Payload = {MessageID, Timestamp, _}}) ->
|
||||
Bin = term_to_binary(Payload),
|
||||
emqx_ds_message_storage_bitmask:store(DB, MessageID, Timestamp, Topic, Bin)
|
||||
end,
|
||||
Messages
|
||||
).
|
||||
|
||||
iterate_db(DB, Iteration) ->
|
||||
iterate_db(make_iterator(DB, Iteration)).
|
||||
|
||||
iterate_db(It) ->
|
||||
case emqx_ds_message_storage_bitmask:next(It) of
|
||||
{value, Payload, ItNext} ->
|
||||
[binary_to_term(Payload) | iterate_db(ItNext)];
|
||||
none ->
|
||||
[]
|
||||
end.
|
||||
|
||||
make_iterator(DB, Replay) ->
|
||||
{ok, It} = emqx_ds_message_storage_bitmask:make_iterator(DB, Replay),
|
||||
It.
|
||||
|
||||
make_iterator(DB, Replay, Options) ->
|
||||
{ok, It} = emqx_ds_message_storage_bitmask:make_iterator(DB, Replay, Options),
|
||||
It.
|
||||
|
||||
run_iterator_commands([iterate | Rest], It, Ctx) ->
|
||||
case emqx_ds_message_storage_bitmask:next(It) of
|
||||
{value, Payload, ItNext} ->
|
||||
[binary_to_term(Payload) | run_iterator_commands(Rest, ItNext, Ctx)];
|
||||
none ->
|
||||
[]
|
||||
end;
|
||||
run_iterator_commands([{preserve, restore} | Rest], It, Ctx) ->
|
||||
#{
|
||||
db := DB,
|
||||
replay := Replay
|
||||
} = Ctx,
|
||||
Serial = emqx_ds_message_storage_bitmask:preserve_iterator(It),
|
||||
{ok, ItNext} = emqx_ds_message_storage_bitmask:restore_iterator(DB, Replay, Serial),
|
||||
run_iterator_commands(Rest, ItNext, Ctx);
|
||||
run_iterator_commands([], It, _Ctx) ->
|
||||
iterate_db(It).
|
||||
|
||||
store_shim(Shim, Messages) ->
|
||||
lists:foreach(
|
||||
fun({Topic, Payload = {MessageID, Timestamp, _}}) ->
|
||||
Bin = term_to_binary(Payload),
|
||||
emqx_ds_message_storage_bitmask_shim:store(Shim, MessageID, Timestamp, Topic, Bin)
|
||||
end,
|
||||
Messages
|
||||
).
|
||||
|
||||
iterate_shim(Shim, Iteration) ->
|
||||
lists:map(
|
||||
fun binary_to_term/1,
|
||||
emqx_ds_message_storage_bitmask_shim:iterate(Shim, Iteration)
|
||||
).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Setup / teardown
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
open_db(Filepath, Options) ->
|
||||
{ok, Handle} = rocksdb:open(Filepath, [{create_if_missing, true}]),
|
||||
{Schema, CFRefs} = emqx_ds_message_storage_bitmask:create_new(Handle, ?GEN_ID, Options),
|
||||
DB = emqx_ds_message_storage_bitmask:open(?SHARD, Handle, ?GEN_ID, CFRefs, Schema),
|
||||
{DB, Handle}.
|
||||
|
||||
close_db(Handle) ->
|
||||
rocksdb:close(Handle).
|
||||
|
||||
make_filepath(TC) ->
|
||||
make_filepath(TC, 0).
|
||||
|
||||
make_filepath(TC, InstID) ->
|
||||
Name = io_lib:format("~0p.~0p", [TC, InstID]),
|
||||
Path = filename:join(?WORK_DIR ++ ["proper", "runs", get_run_id(), ?MODULE_STRING, Name]),
|
||||
ok = filelib:ensure_dir(Path),
|
||||
Path.
|
||||
|
||||
get_run_id() ->
|
||||
case persistent_term:get(?RUN_ID, undefined) of
|
||||
RunID when RunID /= undefined ->
|
||||
RunID;
|
||||
undefined ->
|
||||
RunID = make_run_id(),
|
||||
ok = persistent_term:put(?RUN_ID, RunID),
|
||||
RunID
|
||||
end.
|
||||
|
||||
make_run_id() ->
|
||||
calendar:system_time_to_rfc3339(erlang:system_time(second), [{offset, "Z"}]).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Type generators
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
topic() ->
|
||||
non_empty(list(topic_level())).
|
||||
|
||||
topic(EntropyWeights) ->
|
||||
?LET(L, scaled(1 / 4, list(1)), begin
|
||||
EWs = lists:sublist(EntropyWeights ++ L, length(L)),
|
||||
?SIZED(S, [oneof([topic_level(S * EW), topic_level_fixed()]) || EW <- EWs])
|
||||
end).
|
||||
|
||||
topic_filter() ->
|
||||
?SUCHTHAT(
|
||||
L,
|
||||
non_empty(
|
||||
list(
|
||||
frequency([
|
||||
{5, topic_level()},
|
||||
{2, '+'},
|
||||
{1, '#'}
|
||||
])
|
||||
)
|
||||
),
|
||||
not lists:member('#', L) orelse lists:last(L) == '#'
|
||||
).
|
||||
|
||||
topic_level_pattern() ->
|
||||
frequency([
|
||||
{5, level},
|
||||
{2, '+'},
|
||||
{1, '#'}
|
||||
]).
|
||||
|
||||
topic_filter_pattern() ->
|
||||
list(topic_level_pattern()).
|
||||
|
||||
topic_filter(Topic) ->
|
||||
?LET({T, Pat}, {Topic, topic_filter_pattern()}, make_topic_filter(Pat, T)).
|
||||
|
||||
make_topic_filter([], _) ->
|
||||
[];
|
||||
make_topic_filter(_, []) ->
|
||||
[];
|
||||
make_topic_filter(['#' | _], _) ->
|
||||
['#'];
|
||||
make_topic_filter(['+' | Rest], [_ | Levels]) ->
|
||||
['+' | make_topic_filter(Rest, Levels)];
|
||||
make_topic_filter([level | Rest], [L | Levels]) ->
|
||||
[L | make_topic_filter(Rest, Levels)].
|
||||
|
||||
% topic() ->
|
||||
% ?LAZY(?SIZED(S, frequency([
|
||||
% {S, [topic_level() | topic()]},
|
||||
% {1, []}
|
||||
% ]))).
|
||||
|
||||
% topic_filter() ->
|
||||
% ?LAZY(?SIZED(S, frequency([
|
||||
% {round(S / 3 * 2), [topic_level() | topic_filter()]},
|
||||
% {round(S / 3 * 1), ['+' | topic_filter()]},
|
||||
% {1, []},
|
||||
% {1, ['#']}
|
||||
% ]))).
|
||||
|
||||
topic_level() ->
|
||||
?LET(L, list(oneof([range($a, $z), range($0, $9)])), iolist_to_binary(L)).
|
||||
|
||||
topic_level(Entropy) ->
|
||||
S = floor(1 + math:log2(Entropy) / 4),
|
||||
?LET(I, range(1, Entropy), iolist_to_binary(io_lib:format("~*.16.0B", [S, I]))).
|
||||
|
||||
topic_level_fixed() ->
|
||||
oneof([
|
||||
<<"foo">>,
|
||||
<<"bar">>,
|
||||
<<"baz">>,
|
||||
<<"xyzzy">>
|
||||
]).
|
||||
|
||||
keymapper() ->
|
||||
?LET(
|
||||
{TimestampBits, TopicBits, Epoch},
|
||||
{
|
||||
range(0, 128),
|
||||
non_empty(list(range(1, 32))),
|
||||
pos_integer()
|
||||
},
|
||||
make_keymapper(TimestampBits, TopicBits, Epoch * 100)
|
||||
).
|
||||
|
||||
keyspace_filter() ->
|
||||
?LET(
|
||||
{TopicFilter, StartTime, Keymapper},
|
||||
{topic_filter(), pos_integer(), keymapper()},
|
||||
emqx_ds_message_storage_bitmask:make_keyspace_filter({TopicFilter, StartTime}, Keymapper)
|
||||
).
|
||||
|
||||
messages(Topic) ->
|
||||
?LET(
|
||||
Ts,
|
||||
list(Topic),
|
||||
interleaved(
|
||||
?LET(Messages, vector(length(Ts), scaled(4, list(message()))), lists:zip(Ts, Messages))
|
||||
)
|
||||
).
|
||||
|
||||
message() ->
|
||||
?LET({Timestamp, Payload}, {timestamp(), binary()}, {emqx_guid:gen(), Timestamp, Payload}).
|
||||
|
||||
message_streams(Topic) ->
|
||||
?LET(Topics, list(Topic), [{T, payload_gen:binary_stream_gen(64)} || T <- Topics]).
|
||||
|
||||
timestamp() ->
|
||||
scaled(20, pos_integer()).
|
||||
|
||||
start_time() ->
|
||||
scaled(10, pos_integer()).
|
||||
|
||||
bitstr(Size) ->
|
||||
?LET(B, binary(1 + (Size div 8)), binary:decode_unsigned(B) band (1 bsl Size - 1)).
|
||||
|
||||
nth(L) ->
|
||||
?LET(I, range(1, length(L)), lists:nth(I, L)).
|
||||
|
||||
scaled(Factor, T) ->
|
||||
?SIZED(S, resize(ceil(S * Factor), T)).
|
||||
|
||||
interleaved(T) ->
|
||||
?LET({L, Seed}, {T, integer()}, interleave(L, rand:seed_s(exsss, Seed))).
|
||||
|
||||
shuffled(T) ->
|
||||
?LET({L, Seed}, {T, integer()}, shuffle(L, rand:seed_s(exsss, Seed))).
|
||||
|
||||
flat(T) ->
|
||||
?LET(L, T, lists:flatten(L)).
|
||||
|
||||
%%--------------------------------------------------------------------
|
||||
%% Internal functions
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
make_keymapper(TimestampBits, TopicBits, MaxEpoch) ->
|
||||
emqx_ds_message_storage_bitmask:make_keymapper(#{
|
||||
timestamp_bits => TimestampBits,
|
||||
topic_bits_per_level => TopicBits,
|
||||
epoch => MaxEpoch
|
||||
}).
|
||||
|
||||
get_keymapper_bitsize(Keymapper) ->
|
||||
maps:get(bitsize, emqx_ds_message_storage_bitmask:keymapper_info(Keymapper)).
|
||||
|
||||
-spec interleave(list({Tag, list(E)}), rand:state()) -> list({Tag, E}).
|
||||
interleave(Seqs, Rng) ->
|
||||
interleave(Seqs, length(Seqs), Rng).
|
||||
|
||||
interleave(Seqs, L, Rng) when L > 0 ->
|
||||
{N, RngNext} = rand:uniform_s(L, Rng),
|
||||
{SeqHead, SeqTail} = lists:split(N - 1, Seqs),
|
||||
case SeqTail of
|
||||
[{Tag, [M | Rest]} | SeqRest] ->
|
||||
[{Tag, M} | interleave(SeqHead ++ [{Tag, Rest} | SeqRest], L, RngNext)];
|
||||
[{_, []} | SeqRest] ->
|
||||
interleave(SeqHead ++ SeqRest, L - 1, RngNext)
|
||||
end;
|
||||
interleave([], 0, _) ->
|
||||
[].
|
||||
|
||||
-spec shuffle(list(E), rand:state()) -> list(E).
|
||||
shuffle(L, Rng) ->
|
||||
{Rands, _} = randoms(length(L), Rng),
|
||||
[E || {_, E} <- lists:sort(lists:zip(Rands, L))].
|
||||
|
||||
randoms(N, Rng) when N > 0 ->
|
||||
{Rand, RngNext} = rand:uniform_s(Rng),
|
||||
{Tail, RngFinal} = randoms(N - 1, RngNext),
|
||||
{[Rand | Tail], RngFinal};
|
||||
randoms(_, Rng) ->
|
||||
{[], Rng}.
|
|
@ -0,0 +1,22 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2017-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
|
||||
-ifndef(EMQX_BPAPI_HRL).
|
||||
-define(EMQX_BPAPI_HRL, true).
|
||||
|
||||
-compile({parse_transform, emqx_bpapi_trans}).
|
||||
|
||||
-endif.
|
|
@ -0,0 +1,43 @@
|
|||
%%--------------------------------------------------------------------
|
||||
%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||
%%
|
||||
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||
%% you may not use this file except in compliance with the License.
|
||||
%% You may obtain a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing, software
|
||||
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
%% See the License for the specific language governing permissions and
|
||||
%% limitations under the License.
|
||||
%%--------------------------------------------------------------------
|
||||
-ifndef(EMQX_MESSAGE_HRL).
|
||||
-define(EMQX_MESSAGE_HRL, true).
|
||||
|
||||
%% See 'Application Message' in MQTT Version 5.0
|
||||
-record(message, {
|
||||
%% Global unique message ID
|
||||
id :: binary(),
|
||||
%% Message QoS
|
||||
qos = 0,
|
||||
%% Message from
|
||||
from :: atom() | binary(),
|
||||
%% Message flags
|
||||
flags = #{} :: emqx_types:flags(),
|
||||
%% Message headers. May contain any metadata. e.g. the
|
||||
%% protocol version number, username, peerhost or
|
||||
%% the PUBLISH properties (MQTT 5.0).
|
||||
headers = #{} :: emqx_types:headers(),
|
||||
%% Topic that the message is published to
|
||||
topic :: emqx_types:topic(),
|
||||
%% Message Payload
|
||||
payload :: emqx_types:payload(),
|
||||
%% Timestamp (Unit: millisecond)
|
||||
timestamp :: integer(),
|
||||
%% not used so far, for future extension
|
||||
extra = [] :: term()
|
||||
}).
|
||||
|
||||
-endif.
|
|
@ -106,6 +106,10 @@
|
|||
emqx_exproto_pb % generated code for protobuf
|
||||
]}.
|
||||
|
||||
{eunit_opts,
|
||||
[ verbose
|
||||
]}.
|
||||
|
||||
{project_plugins,
|
||||
[ erlfmt,
|
||||
{rebar3_hex, "7.0.2"},
|
||||
|
|
Loading…
Reference in New Issue