refactor(topic_index): optimize trie-search performance
This commit is contained in:
parent
6b152b3cb7
commit
f4c8c6be55
|
@ -0,0 +1,116 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% @doc Topic index implemetation with gb_trees stored in persistent_term.
|
||||||
|
%% This is only suitable for a static set of topic or topic-filters.
|
||||||
|
|
||||||
|
-module(emqx_topic_gbt).
|
||||||
|
|
||||||
|
-export([new/0, new/1]).
|
||||||
|
-export([insert/4]).
|
||||||
|
-export([delete/3]).
|
||||||
|
-export([match/2]).
|
||||||
|
-export([matches/3]).
|
||||||
|
|
||||||
|
-export([get_id/1]).
|
||||||
|
-export([get_topic/1]).
|
||||||
|
-export([get_record/2]).
|
||||||
|
|
||||||
|
-type word() :: binary() | '+' | '#'.
|
||||||
|
-type key(ID) :: {[word()], {ID}}.
|
||||||
|
-type match(ID) :: key(ID).
|
||||||
|
-type name() :: any().
|
||||||
|
|
||||||
|
%% @private Only for testing.
|
||||||
|
-spec new() -> name().
|
||||||
|
new() ->
|
||||||
|
new(test).
|
||||||
|
|
||||||
|
%% @doc Create a new gb_tree and store it in the persitent_term with the
|
||||||
|
%% given name.
|
||||||
|
-spec new(name()) -> name().
|
||||||
|
new(Name) ->
|
||||||
|
T = gb_trees:from_orddict([]),
|
||||||
|
true = gbt_update(Name, T),
|
||||||
|
Name.
|
||||||
|
|
||||||
|
%% @doc Insert a new entry into the index that associates given topic filter to given
|
||||||
|
%% record ID, and attaches arbitrary record to the entry. This allows users to choose
|
||||||
|
%% between regular and "materialized" indexes, for example.
|
||||||
|
-spec insert(emqx_types:topic(), _ID, _Record, name()) -> true.
|
||||||
|
insert(Filter, ID, Record, Name) ->
|
||||||
|
Tree = gbt(Name),
|
||||||
|
Key = key(Filter, ID),
|
||||||
|
NewTree = gb_trees:enter(Key, Record, Tree),
|
||||||
|
true = gbt_update(Name, NewTree).
|
||||||
|
|
||||||
|
%% @doc Delete an entry from the index that associates given topic filter to given
|
||||||
|
%% record ID. Deleting non-existing entry is not an error.
|
||||||
|
-spec delete(emqx_types:topic(), _ID, name()) -> true.
|
||||||
|
delete(Filter, ID, Name) ->
|
||||||
|
Tree = gbt(Name),
|
||||||
|
Key = key(Filter, ID),
|
||||||
|
NewTree = gb_trees:delete_any(Key, Tree),
|
||||||
|
true = gbt_update(Name, NewTree).
|
||||||
|
|
||||||
|
%% @doc Match given topic against the index and return the first match, or `false` if
|
||||||
|
%% no match is found.
|
||||||
|
-spec match(emqx_types:topic(), name()) -> match(_ID) | false.
|
||||||
|
match(Topic, Name) ->
|
||||||
|
emqx_trie_search:match(Topic, make_nextf(Name)).
|
||||||
|
|
||||||
|
%% @doc Match given topic against the index and return _all_ matches.
|
||||||
|
%% If `unique` option is given, return only unique matches by record ID.
|
||||||
|
matches(Topic, Name, Opts) ->
|
||||||
|
emqx_trie_search:matches(Topic, make_nextf(Name), Opts).
|
||||||
|
|
||||||
|
%% @doc Extract record ID from the match.
|
||||||
|
-spec get_id(match(ID)) -> ID.
|
||||||
|
get_id(Key) ->
|
||||||
|
emqx_trie_search:get_id(Key).
|
||||||
|
|
||||||
|
%% @doc Extract topic (or topic filter) from the match.
|
||||||
|
-spec get_topic(match(_ID)) -> emqx_types:topic().
|
||||||
|
get_topic(Key) ->
|
||||||
|
emqx_trie_search:get_topic(Key).
|
||||||
|
|
||||||
|
%% @doc Fetch the record associated with the match.
|
||||||
|
-spec get_record(match(_ID), name()) -> _Record.
|
||||||
|
get_record(Key, Name) ->
|
||||||
|
Gbt = gbt(Name),
|
||||||
|
gb_trees:get(Key, Gbt).
|
||||||
|
|
||||||
|
key(TopicOrFilter, ID) ->
|
||||||
|
emqx_trie_search:make_key(TopicOrFilter, ID).
|
||||||
|
|
||||||
|
gbt(Name) ->
|
||||||
|
persistent_term:get({?MODULE, Name}).
|
||||||
|
|
||||||
|
gbt_update(Name, Tree) ->
|
||||||
|
persistent_term:put({?MODULE, Name}, Tree),
|
||||||
|
true.
|
||||||
|
|
||||||
|
gbt_next(nil, _Input) ->
|
||||||
|
emqx_trie_search:ceiling();
|
||||||
|
gbt_next({P, _V, _Smaller, Bigger}, K) when K >= P ->
|
||||||
|
gbt_next(Bigger, K);
|
||||||
|
gbt_next({P, _V, Smaller, _Bigger}, K) ->
|
||||||
|
NextKey = gbt_next(Smaller, K),
|
||||||
|
min(P, NextKey).
|
||||||
|
|
||||||
|
make_nextf(Name) ->
|
||||||
|
{_SizeWeDontCare, TheTree} = gbt(Name),
|
||||||
|
fun(Key) -> gbt_next(TheTree, Key) end.
|
|
@ -14,18 +14,7 @@
|
||||||
%% limitations under the License.
|
%% limitations under the License.
|
||||||
%%--------------------------------------------------------------------
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
%% @doc Topic index for matching topics to topic filters.
|
%% @doc Topic index implemetation with ETS table as ordered-set storage.
|
||||||
%%
|
|
||||||
%% Works on top of ETS ordered_set table. Keys are tuples constructed from
|
|
||||||
%% parsed topic filters and record IDs, wrapped in a tuple to order them
|
|
||||||
%% strictly greater than unit tuple (`{}`). Existing table may be used if
|
|
||||||
%% existing keys will not collide with index keys.
|
|
||||||
%%
|
|
||||||
%% Designed to effectively answer questions like:
|
|
||||||
%% 1. Does any topic filter match given topic?
|
|
||||||
%% 2. Which records are associated with topic filters matching given topic?
|
|
||||||
%% 3. Which topic filters match given topic?
|
|
||||||
%% 4. Which record IDs are associated with topic filters matching given topic?
|
|
||||||
|
|
||||||
-module(emqx_topic_index).
|
-module(emqx_topic_index).
|
||||||
|
|
||||||
|
@ -39,186 +28,51 @@
|
||||||
-export([get_topic/1]).
|
-export([get_topic/1]).
|
||||||
-export([get_record/2]).
|
-export([get_record/2]).
|
||||||
|
|
||||||
-type word() :: binary() | '+' | '#'.
|
-type key(ID) :: emqx_trie_search:key(ID).
|
||||||
-type key(ID) :: {[word()], {ID}}.
|
|
||||||
-type match(ID) :: key(ID).
|
-type match(ID) :: key(ID).
|
||||||
|
|
||||||
%% @doc Create a new ETS table suitable for topic index.
|
%% @doc Create a new ETS table suitable for topic index.
|
||||||
%% Usable mostly for testing purposes.
|
%% Usable mostly for testing purposes.
|
||||||
-spec new() -> ets:table().
|
-spec new() -> ets:table().
|
||||||
new() ->
|
new() ->
|
||||||
ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]).
|
T = ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]),
|
||||||
|
ets:insert(T, {emqx_trie_search:ceiling(), []}),
|
||||||
|
T.
|
||||||
|
|
||||||
%% @doc Insert a new entry into the index that associates given topic filter to given
|
%% @doc Insert a new entry into the index that associates given topic filter to given
|
||||||
%% record ID, and attaches arbitrary record to the entry. This allows users to choose
|
%% record ID, and attaches arbitrary record to the entry. This allows users to choose
|
||||||
%% between regular and "materialized" indexes, for example.
|
%% between regular and "materialized" indexes, for example.
|
||||||
-spec insert(emqx_types:topic(), _ID, _Record, ets:table()) -> true.
|
-spec insert(emqx_types:topic(), _ID, _Record, ets:table()) -> true.
|
||||||
insert(Filter, ID, Record, Tab) ->
|
insert(Filter, ID, Record, Tab) ->
|
||||||
ets:insert(Tab, {{words(Filter), {ID}}, Record}).
|
Key = key(Filter, ID),
|
||||||
|
true = ets:insert(Tab, {Key, Record}).
|
||||||
|
|
||||||
%% @doc Delete an entry from the index that associates given topic filter to given
|
%% @doc Delete an entry from the index that associates given topic filter to given
|
||||||
%% record ID. Deleting non-existing entry is not an error.
|
%% record ID. Deleting non-existing entry is not an error.
|
||||||
-spec delete(emqx_types:topic(), _ID, ets:table()) -> true.
|
-spec delete(emqx_types:topic(), _ID, ets:table()) -> true.
|
||||||
delete(Filter, ID, Tab) ->
|
delete(Filter, ID, Tab) ->
|
||||||
ets:delete(Tab, {words(Filter), {ID}}).
|
true = ets:delete(Tab, key(Filter, ID)).
|
||||||
|
|
||||||
%% @doc Match given topic against the index and return the first match, or `false` if
|
%% @doc Match given topic against the index and return the first match, or `false` if
|
||||||
%% no match is found.
|
%% no match is found.
|
||||||
-spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false.
|
-spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false.
|
||||||
match(Topic, Tab) ->
|
match(Topic, Tab) ->
|
||||||
{Words, RPrefix} = match_init(Topic),
|
emqx_trie_search:match(Topic, make_nextf(Tab)).
|
||||||
match(Words, RPrefix, Tab).
|
|
||||||
|
|
||||||
match(Words, RPrefix, Tab) ->
|
|
||||||
Prefix = lists:reverse(RPrefix),
|
|
||||||
match(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Tab).
|
|
||||||
|
|
||||||
match(K, Prefix, Words, RPrefix, Tab) ->
|
|
||||||
case match_next(Prefix, K, Words) of
|
|
||||||
true ->
|
|
||||||
K;
|
|
||||||
skip ->
|
|
||||||
match(ets:next(Tab, K), Prefix, Words, RPrefix, Tab);
|
|
||||||
stop ->
|
|
||||||
false;
|
|
||||||
Matched ->
|
|
||||||
match_rest(Matched, Words, RPrefix, Tab)
|
|
||||||
end.
|
|
||||||
|
|
||||||
match_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Tab) ->
|
|
||||||
% NOTE
|
|
||||||
% Fast-forward through identical words in the topic and the last key suffixes.
|
|
||||||
% This should save us a few redundant `ets:next` calls at the cost of slightly
|
|
||||||
% more complex match patterns.
|
|
||||||
match_rest(SLast, Rest, [W1 | RPrefix], Tab);
|
|
||||||
match_rest(SLast, [W | Rest], RPrefix, Tab) when is_list(SLast) ->
|
|
||||||
match(Rest, [W | RPrefix], Tab);
|
|
||||||
match_rest(plus, [W | Rest], RPrefix, Tab) ->
|
|
||||||
% NOTE
|
|
||||||
% There's '+' in the key suffix, meaning we should consider 2 alternatives:
|
|
||||||
% 1. Match the rest of the topic as if there was '+' in the current position.
|
|
||||||
% 2. Skip this key and try to match the topic as it is.
|
|
||||||
case match(Rest, ['+' | RPrefix], Tab) of
|
|
||||||
Match = {_, _} ->
|
|
||||||
Match;
|
|
||||||
false ->
|
|
||||||
match(Rest, [W | RPrefix], Tab)
|
|
||||||
end;
|
|
||||||
match_rest(_, [], _RPrefix, _Tab) ->
|
|
||||||
false.
|
|
||||||
|
|
||||||
%% @doc Match given topic against the index and return _all_ matches.
|
%% @doc Match given topic against the index and return _all_ matches.
|
||||||
%% If `unique` option is given, return only unique matches by record ID.
|
%% If `unique` option is given, return only unique matches by record ID.
|
||||||
-spec matches(emqx_types:topic(), ets:table(), _Opts :: [unique]) -> [match(_ID)].
|
|
||||||
matches(Topic, Tab, Opts) ->
|
matches(Topic, Tab, Opts) ->
|
||||||
{Words, RPrefix} = match_init(Topic),
|
emqx_trie_search:matches(Topic, make_nextf(Tab), Opts).
|
||||||
AccIn =
|
|
||||||
case Opts of
|
|
||||||
[unique | _] -> #{};
|
|
||||||
[] -> []
|
|
||||||
end,
|
|
||||||
Matches = matches(Words, RPrefix, AccIn, Tab),
|
|
||||||
case Matches of
|
|
||||||
#{} -> maps:values(Matches);
|
|
||||||
_ -> Matches
|
|
||||||
end.
|
|
||||||
|
|
||||||
matches(Words, RPrefix, Acc, Tab) ->
|
|
||||||
Prefix = lists:reverse(RPrefix),
|
|
||||||
matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab).
|
|
||||||
|
|
||||||
matches(Words, RPrefix, K = {Filter, _}, Acc, Tab) ->
|
|
||||||
Prefix = lists:reverse(RPrefix),
|
|
||||||
case Prefix > Filter of
|
|
||||||
true ->
|
|
||||||
% NOTE: Prefix already greater than the last key seen, need to `ets:next/2`.
|
|
||||||
matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab);
|
|
||||||
false ->
|
|
||||||
% NOTE: Prefix is still less than or equal to the last key seen, reuse it.
|
|
||||||
matches(K, Prefix, Words, RPrefix, Acc, Tab)
|
|
||||||
end.
|
|
||||||
|
|
||||||
matches(K, Prefix, Words, RPrefix, Acc, Tab) ->
|
|
||||||
case match_next(Prefix, K, Words) of
|
|
||||||
true ->
|
|
||||||
matches(ets:next(Tab, K), Prefix, Words, RPrefix, match_add(K, Acc), Tab);
|
|
||||||
skip ->
|
|
||||||
matches(ets:next(Tab, K), Prefix, Words, RPrefix, Acc, Tab);
|
|
||||||
stop ->
|
|
||||||
Acc;
|
|
||||||
Matched ->
|
|
||||||
% NOTE: Prserve next key on the stack to save on `ets:next/2` calls.
|
|
||||||
matches_rest(Matched, Words, RPrefix, K, Acc, Tab)
|
|
||||||
end.
|
|
||||||
|
|
||||||
matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, K, Acc, Tab) ->
|
|
||||||
% NOTE
|
|
||||||
% Fast-forward through identical words in the topic and the last key suffixes.
|
|
||||||
% This should save us a few redundant `ets:next` calls at the cost of slightly
|
|
||||||
% more complex match patterns.
|
|
||||||
matches_rest(SLast, Rest, [W1 | RPrefix], K, Acc, Tab);
|
|
||||||
matches_rest(SLast, [W | Rest], RPrefix, K, Acc, Tab) when is_list(SLast) ->
|
|
||||||
matches(Rest, [W | RPrefix], K, Acc, Tab);
|
|
||||||
matches_rest(plus, [W | Rest], RPrefix, K, Acc, Tab) ->
|
|
||||||
% NOTE
|
|
||||||
% There's '+' in the key suffix, meaning we should accumulate all matches from
|
|
||||||
% each of 2 branches:
|
|
||||||
% 1. Match the rest of the topic as if there was '+' in the current position.
|
|
||||||
% 2. Skip this key and try to match the topic as it is.
|
|
||||||
NAcc = matches(Rest, ['+' | RPrefix], K, Acc, Tab),
|
|
||||||
matches(Rest, [W | RPrefix], K, NAcc, Tab);
|
|
||||||
matches_rest(_, [], _RPrefix, _K, Acc, _Tab) ->
|
|
||||||
Acc.
|
|
||||||
|
|
||||||
match_add(K = {_Filter, ID}, Acc = #{}) ->
|
|
||||||
% NOTE: ensuring uniqueness by record ID
|
|
||||||
Acc#{ID => K};
|
|
||||||
match_add(K, Acc) ->
|
|
||||||
[K | Acc].
|
|
||||||
|
|
||||||
match_next(Prefix, {Filter, _ID}, Suffix) ->
|
|
||||||
match_filter(Prefix, Filter, Suffix);
|
|
||||||
match_next(_, '$end_of_table', _) ->
|
|
||||||
stop.
|
|
||||||
|
|
||||||
match_filter([], [], []) ->
|
|
||||||
% NOTE: we matched the topic exactly
|
|
||||||
true;
|
|
||||||
match_filter([], [], _Suffix) ->
|
|
||||||
% NOTE: we matched the prefix, but there may be more matches next
|
|
||||||
skip;
|
|
||||||
match_filter([], ['#'], _Suffix) ->
|
|
||||||
% NOTE: naturally, '#' < '+', so this is already optimal for `match/2`
|
|
||||||
true;
|
|
||||||
match_filter([], ['+' | _], _Suffix) ->
|
|
||||||
plus;
|
|
||||||
match_filter([], [_H | _] = Rest, _Suffix) ->
|
|
||||||
Rest;
|
|
||||||
match_filter([H | T1], [H | T2], Suffix) ->
|
|
||||||
match_filter(T1, T2, Suffix);
|
|
||||||
match_filter([H1 | _], [H2 | _], _Suffix) when H2 > H1 ->
|
|
||||||
% NOTE: we're strictly past the prefix, no need to continue
|
|
||||||
stop.
|
|
||||||
|
|
||||||
match_init(Topic) ->
|
|
||||||
case words(Topic) of
|
|
||||||
[W = <<"$", _/bytes>> | Rest] ->
|
|
||||||
% NOTE
|
|
||||||
% This will effectively skip attempts to match special topics to `#` or `+/...`.
|
|
||||||
{Rest, [W]};
|
|
||||||
Words ->
|
|
||||||
{Words, []}
|
|
||||||
end.
|
|
||||||
|
|
||||||
%% @doc Extract record ID from the match.
|
%% @doc Extract record ID from the match.
|
||||||
-spec get_id(match(ID)) -> ID.
|
-spec get_id(match(ID)) -> ID.
|
||||||
get_id({_Filter, {ID}}) ->
|
get_id(Key) ->
|
||||||
ID.
|
emqx_trie_search:get_id(Key).
|
||||||
|
|
||||||
%% @doc Extract topic (or topic filter) from the match.
|
%% @doc Extract topic (or topic filter) from the match.
|
||||||
-spec get_topic(match(_ID)) -> emqx_types:topic().
|
-spec get_topic(match(_ID)) -> emqx_types:topic().
|
||||||
get_topic({Filter, _ID}) ->
|
get_topic(Key) ->
|
||||||
emqx_topic:join(Filter).
|
emqx_trie_search:get_topic(Key).
|
||||||
|
|
||||||
%% @doc Fetch the record associated with the match.
|
%% @doc Fetch the record associated with the match.
|
||||||
%% NOTE: Only really useful for ETS tables where the record ID is the first element.
|
%% NOTE: Only really useful for ETS tables where the record ID is the first element.
|
||||||
|
@ -226,17 +80,8 @@ get_topic({Filter, _ID}) ->
|
||||||
get_record(K, Tab) ->
|
get_record(K, Tab) ->
|
||||||
ets:lookup_element(Tab, K, 2).
|
ets:lookup_element(Tab, K, 2).
|
||||||
|
|
||||||
%%
|
key(TopicOrFilter, ID) ->
|
||||||
|
emqx_trie_search:make_key(TopicOrFilter, ID).
|
||||||
|
|
||||||
-spec words(emqx_types:topic()) -> [word()].
|
make_nextf(Tab) ->
|
||||||
words(Topic) when is_binary(Topic) ->
|
fun(Key) -> ets:next(Tab, Key) end.
|
||||||
% NOTE
|
|
||||||
% This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty
|
|
||||||
% tokens to ''. This is needed to keep ordering of words consistent with what
|
|
||||||
% `match_filter/3` expects.
|
|
||||||
[word(W) || W <- emqx_topic:tokens(Topic)].
|
|
||||||
|
|
||||||
-spec word(binary()) -> word().
|
|
||||||
word(<<"+">>) -> '+';
|
|
||||||
word(<<"#">>) -> '#';
|
|
||||||
word(Bin) -> Bin.
|
|
||||||
|
|
|
@ -0,0 +1,344 @@
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
|
||||||
|
%%
|
||||||
|
%% Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
%% you may not use this file except in compliance with the License.
|
||||||
|
%% You may obtain a copy of the License at
|
||||||
|
%%
|
||||||
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
%%
|
||||||
|
%% Unless required by applicable law or agreed to in writing, software
|
||||||
|
%% distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
%% See the License for the specific language governing permissions and
|
||||||
|
%% limitations under the License.
|
||||||
|
%%--------------------------------------------------------------------
|
||||||
|
|
||||||
|
%% @doc Topic index for matching topics to topic filters.
|
||||||
|
%%
|
||||||
|
%% Works on top of ETS ordered_set table. Keys are tuples constructed from
|
||||||
|
%% parsed topic filters and record IDs, wrapped in a tuple to order them
|
||||||
|
%% strictly greater than unit tuple (`{}`). Existing table may be used if
|
||||||
|
%% existing keys will not collide with index keys.
|
||||||
|
%%
|
||||||
|
%% Designed to effectively answer questions like:
|
||||||
|
%% 1. Does any topic filter match given topic?
|
||||||
|
%% 2. Which records are associated with topic filters matching given topic?
|
||||||
|
%% 3. Which topic filters match given topic?
|
||||||
|
%% 4. Which record IDs are associated with topic filters matching given topic?
|
||||||
|
%%
|
||||||
|
%% Trie-search algorithm:
|
||||||
|
%%
|
||||||
|
%% Given a 3-level topic (e.g. a/b/c), if we leave out '#' for now,
|
||||||
|
%% all possible subscriptions of a/b/c can be enumerated as below:
|
||||||
|
%%
|
||||||
|
%% a/b/c
|
||||||
|
%% a/b/+
|
||||||
|
%% a/+/c <--- subscribed
|
||||||
|
%% a/+/+
|
||||||
|
%% +/b/c <--- subscribed
|
||||||
|
%% +/b/+
|
||||||
|
%% +/+/c
|
||||||
|
%% +/+/+ <--- start searching upward from here
|
||||||
|
%%
|
||||||
|
%% Let's name this search space "Space1".
|
||||||
|
%% If we brute-force it, the scope would be 8 (2^3).
|
||||||
|
%% Meaning this has O(2^N) complexity (N being the level of topics).
|
||||||
|
%%
|
||||||
|
%% This clearly isn't going to work.
|
||||||
|
%% Should we then try to enumerate all subscribers instead?
|
||||||
|
%% If there are also other subscriptions, e.g. "+/x/y" and "+/b/0"
|
||||||
|
%%
|
||||||
|
%% a/+/c <--- match of a/b/c
|
||||||
|
%% +/x/n
|
||||||
|
%% ...
|
||||||
|
%% +/x/2
|
||||||
|
%% +/x/1
|
||||||
|
%% +/b/c <--- match of a/b/c
|
||||||
|
%% +/b/1
|
||||||
|
%% +/b/0
|
||||||
|
%%
|
||||||
|
%% Let's name it "Space2".
|
||||||
|
%%
|
||||||
|
%% This has O(M * L) complexity (M being the total number of subscriptions,
|
||||||
|
%% and L being the number of topic levels).
|
||||||
|
%% This is usually a lot smaller than "Space1", but still not very effective
|
||||||
|
%% if the collection size is e.g. 1 million.
|
||||||
|
%%
|
||||||
|
%% To make it more effective, we'll need to combine the two algorithms:
|
||||||
|
%% Use the ordered subscription topics' prefixes as starting points to make
|
||||||
|
%% guesses about whether or not the next word can be a '+', and skip-over
|
||||||
|
%% to the next possible match.
|
||||||
|
%%
|
||||||
|
%% NOTE: A prerequisite of the ordered collection is, it should be able
|
||||||
|
%% to find the *immediate-next* topic/filter with a given prefix.
|
||||||
|
%%
|
||||||
|
%% In the above example, we start from "+/b/0". When comparing "+/b/0"
|
||||||
|
%% with "a/b/c", we know the matching prefix is "+/b", meaning we can
|
||||||
|
%% start guessing if the next word is '+' or 'c':
|
||||||
|
%% * It can't be '+' because '+' < '0'
|
||||||
|
%% * It might be 'c' because 'c' > '0'
|
||||||
|
%%
|
||||||
|
%% So, we try to jump to the next topic which has a prefix of "+/b/c"
|
||||||
|
%% (this effectively means skipping over "+/b/1").
|
||||||
|
%%
|
||||||
|
%% After "+/b/c" is found to be a matching filter, we move up:
|
||||||
|
%% * The next possible match is "a/+/+" according to Space1
|
||||||
|
%% * The next subscription is "+/x/1" according to Space2
|
||||||
|
%%
|
||||||
|
%% "a/+/+" is lexicographically greater than "+/x/+", so let's jump to
|
||||||
|
%% the immediate-next of 'a/+/+', which is "a/+/c", allowing us to skip
|
||||||
|
%% over all the ones starting with "+/x".
|
||||||
|
%%
|
||||||
|
%% If we take '#' into consideration, it's only one extra comparison to see
|
||||||
|
%% if a filter ends with '#'.
|
||||||
|
%%
|
||||||
|
%% In summary, the complexity of this algorithm is O(N * L)
|
||||||
|
%% N being the number of total matches, and L being the level of the topic.
|
||||||
|
|
||||||
|
-module(emqx_trie_search).
|
||||||
|
|
||||||
|
-export([ceiling/0, make_key/2]).
|
||||||
|
-export([match/2, matches/3, get_id/1, get_topic/1]).
|
||||||
|
-export_type([key/1, word/0, nextf/0, opts/0]).
|
||||||
|
|
||||||
|
-type word() :: binary() | '+' | '#'.
|
||||||
|
-type base_key() :: {binary() | [word()], {}}.
|
||||||
|
-type key(ID) :: {binary() | [word()], {ID}}.
|
||||||
|
-type nextf() :: fun((key(_) | base_key()) -> key(_)).
|
||||||
|
-type opts() :: [unique | return_first].
|
||||||
|
|
||||||
|
%% Holds the constant values of each search.
|
||||||
|
-record(ctx, {
|
||||||
|
%% A function which can quickly find the immediate-next record of the given prefix
|
||||||
|
nextf :: nextf(),
|
||||||
|
%% The initial prefix to start searching from
|
||||||
|
%% if the input topic starts with a dollar-word, it's the first word like [<<"$SYS">>]
|
||||||
|
%% otherwise it's a []
|
||||||
|
prefix0 :: [word()],
|
||||||
|
%% The initial words of a topic
|
||||||
|
words0 :: [word()],
|
||||||
|
%% Return as soon as there is one match found
|
||||||
|
return_first :: boolean()
|
||||||
|
}).
|
||||||
|
|
||||||
|
%% Holds the variable parts of each search.
|
||||||
|
-record(acc, {
|
||||||
|
%% The current searching target topic/filter
|
||||||
|
target,
|
||||||
|
%% The number of moves.
|
||||||
|
%% This is used to check if the target has been moved
|
||||||
|
%% after attempting to append '+' to the searching prefix
|
||||||
|
moves = 0,
|
||||||
|
%% Search result accumulation
|
||||||
|
matches = []
|
||||||
|
}).
|
||||||
|
|
||||||
|
%% All valid utf8 bytes are less than 255.
|
||||||
|
-define(CEILING_TOPIC, <<255>>).
|
||||||
|
-define(CEILING, {?CEILING_TOPIC, {1}}).
|
||||||
|
|
||||||
|
%% @doc Return a key which is greater than all other valid keys.
|
||||||
|
ceiling() ->
|
||||||
|
?CEILING.
|
||||||
|
|
||||||
|
%% @doc Make a search-key for the given topic.
|
||||||
|
-spec make_key(emqx_types:topic(), ID) -> key(ID).
|
||||||
|
make_key(Topic, ID) when is_binary(Topic) ->
|
||||||
|
Words = words(Topic),
|
||||||
|
Key =
|
||||||
|
case lists:any(fun erlang:is_atom/1, Words) of
|
||||||
|
true ->
|
||||||
|
%% it's a wildcard
|
||||||
|
{Words, {ID}};
|
||||||
|
false ->
|
||||||
|
%% Not a wildcard. We do not split the topic
|
||||||
|
%% because they can be found with direct lookups.
|
||||||
|
%% it is also more compact in memory.
|
||||||
|
{Topic, {ID}}
|
||||||
|
end,
|
||||||
|
Key > ceiling() andalso throw({invalid_topic, Topic}),
|
||||||
|
Key.
|
||||||
|
|
||||||
|
%% @doc Extract record ID from the match.
|
||||||
|
-spec get_id(key(ID)) -> ID.
|
||||||
|
get_id({_Filter, {ID}}) ->
|
||||||
|
ID.
|
||||||
|
|
||||||
|
%% @doc Extract topic (or topic filter) from the match.
|
||||||
|
-spec get_topic(key(_ID)) -> emqx_types:topic().
|
||||||
|
get_topic({Filter, _ID}) when is_list(Filter) ->
|
||||||
|
emqx_topic:join(Filter);
|
||||||
|
get_topic({Topic, _ID}) ->
|
||||||
|
Topic.
|
||||||
|
|
||||||
|
%% Make the base-key which can be used to locate the desired search target.
|
||||||
|
base(Prefix) ->
|
||||||
|
{Prefix, {}}.
|
||||||
|
|
||||||
|
%% Move the search target to the key next to the given Base.
|
||||||
|
move_up(#ctx{nextf = NextF}, #acc{moves = Moves} = Acc, Base) ->
|
||||||
|
Acc#acc{target = NextF(Base), moves = Moves + 1}.
|
||||||
|
|
||||||
|
%% The current target key is a match, add it to the accumulation.
|
||||||
|
add(C, #acc{target = Key} = Acc) ->
|
||||||
|
add(C, Acc, Key).
|
||||||
|
|
||||||
|
%% Add the given key to the accumulation.
|
||||||
|
add(#ctx{return_first = true}, _Acc, Key) ->
|
||||||
|
throw({return_first, Key});
|
||||||
|
add(_C, #acc{matches = Matches} = Acc, Key) ->
|
||||||
|
Acc#acc{matches = match_add(Key, Matches)}.
|
||||||
|
|
||||||
|
%% @doc Match given topic against the index and return the first match, or `false` if
|
||||||
|
%% no match is found.
|
||||||
|
-spec match(emqx_types:topic(), nextf()) -> false | key(_).
|
||||||
|
match(Topic, NextF) ->
|
||||||
|
try search(Topic, NextF, [return_first]) of
|
||||||
|
[] ->
|
||||||
|
false
|
||||||
|
catch
|
||||||
|
throw:{return_first, Res} ->
|
||||||
|
Res
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% @doc Match given topic against the index and return _all_ matches.
|
||||||
|
%% If `unique` option is given, return only unique matches by record ID.
|
||||||
|
-spec matches(emqx_types:topic(), nextf(), opts()) -> [key(_)].
|
||||||
|
matches(Topic, NextF, Opts) ->
|
||||||
|
search(Topic, NextF, Opts).
|
||||||
|
|
||||||
|
%% @doc Entrypoint of the search for a given topic.
|
||||||
|
search(Topic, NextF, Opts) ->
|
||||||
|
{Words, Prefix} = match_init(Topic),
|
||||||
|
Context = #ctx{
|
||||||
|
nextf = NextF,
|
||||||
|
prefix0 = Prefix,
|
||||||
|
words0 = Words,
|
||||||
|
return_first = proplists:get_bool(return_first, Opts)
|
||||||
|
},
|
||||||
|
Matches0 =
|
||||||
|
case proplists:get_bool(unique, Opts) of
|
||||||
|
true ->
|
||||||
|
#{};
|
||||||
|
false ->
|
||||||
|
[]
|
||||||
|
end,
|
||||||
|
Acc = search_new(Context, base(Prefix), #acc{matches = Matches0}),
|
||||||
|
#acc{matches = Matches} = match_non_wildcards(Context, base(Topic), Acc),
|
||||||
|
case is_map(Matches) of
|
||||||
|
true ->
|
||||||
|
maps:values(Matches);
|
||||||
|
false ->
|
||||||
|
Matches
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% The recursive entrypoint of the trie-search algorithm.
|
||||||
|
%% Always start from the initial prefix and words.
|
||||||
|
search_new(#ctx{prefix0 = Prefix, words0 = Words0} = C, NewBase, Acc0) ->
|
||||||
|
#acc{target = {Filter, _}} = Acc = move_up(C, Acc0, NewBase),
|
||||||
|
case Prefix of
|
||||||
|
[] ->
|
||||||
|
%% This is not a '$' topic, start from '+'
|
||||||
|
search_plus(C, Words0, Filter, [], Acc);
|
||||||
|
[DollarWord] ->
|
||||||
|
%% Start from the '$' word
|
||||||
|
search_up(C, DollarWord, Words0, Filter, [], Acc)
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% Search to the bigger end of ordered collection of topics and topic-filters.
|
||||||
|
search_up(C, Word, Words, Filter, RPrefix, #acc{target = Base} = Acc) ->
|
||||||
|
case compare(Word, Filter, Words) of
|
||||||
|
{match, full} ->
|
||||||
|
search_new(C, Base, add(C, Acc));
|
||||||
|
{match, prefix} ->
|
||||||
|
search_new(C, Base, Acc);
|
||||||
|
lower ->
|
||||||
|
Acc;
|
||||||
|
higher ->
|
||||||
|
NewBase = base(lists:reverse([Word | RPrefix])),
|
||||||
|
search_new(C, NewBase, Acc);
|
||||||
|
shorter ->
|
||||||
|
search_plus(C, Words, tl(Filter), [Word | RPrefix], Acc)
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% Try to use '+' as the next word in the prefix.
|
||||||
|
search_plus(C, [W, X | Words], [W, X | Filter], RPrefix, Acc) ->
|
||||||
|
%% Directly append the current word to the matching prefix (RPrefix).
|
||||||
|
%% Micro optimization: try not to call the next clause because
|
||||||
|
%% it is not a continuation.
|
||||||
|
search_plus(C, [X | Words], [X | Filter], [W | RPrefix], Acc);
|
||||||
|
search_plus(C, [W | Words], Filter, RPrefix, Acc) ->
|
||||||
|
M = Acc#acc.moves,
|
||||||
|
case search_up(C, '+', Words, Filter, RPrefix, Acc) of
|
||||||
|
#acc{moves = M1} = Acc1 when M1 =:= M ->
|
||||||
|
%% Keep searching for one which has W as the next word
|
||||||
|
search_up(C, W, Words, Filter, RPrefix, Acc1);
|
||||||
|
Acc1 ->
|
||||||
|
%% Already searched
|
||||||
|
Acc1
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% Compare prefix word then the next words in suffix against the search-target
|
||||||
|
%% topic or topic-filter.
|
||||||
|
compare(_, NotFilter, _) when is_binary(NotFilter) ->
|
||||||
|
lower;
|
||||||
|
compare(H, [H | Filter], Words) ->
|
||||||
|
compare(Filter, Words);
|
||||||
|
compare(_, ['#'], _Words) ->
|
||||||
|
{match, full};
|
||||||
|
compare(H1, [H2 | _T2], _Words) when H1 < H2 ->
|
||||||
|
lower;
|
||||||
|
compare(_H, [_ | _], _Words) ->
|
||||||
|
higher.
|
||||||
|
|
||||||
|
%% Now compare the filter suffix and the topic suffix.
|
||||||
|
compare([], []) ->
|
||||||
|
{match, full};
|
||||||
|
compare([], _Words) ->
|
||||||
|
{match, prefix};
|
||||||
|
compare(['#'], _Words) ->
|
||||||
|
{match, full};
|
||||||
|
compare([_ | _], []) ->
|
||||||
|
lower;
|
||||||
|
compare([_ | _], _Words) ->
|
||||||
|
%% cannot know if it's a match, lower, or higher,
|
||||||
|
%% must search with a longer prefix.
|
||||||
|
shorter.
|
||||||
|
|
||||||
|
match_add(K = {_Filter, ID}, Acc = #{}) ->
|
||||||
|
% NOTE: ensuring uniqueness by record ID
|
||||||
|
Acc#{ID => K};
|
||||||
|
match_add(K, Acc) ->
|
||||||
|
[K | Acc].
|
||||||
|
|
||||||
|
match_init(Topic) ->
|
||||||
|
case words(Topic) of
|
||||||
|
[W = <<"$", _/bytes>> | Rest] ->
|
||||||
|
% NOTE
|
||||||
|
% This will effectively skip attempts to match special topics to `#` or `+/...`.
|
||||||
|
{Rest, [W]};
|
||||||
|
Words ->
|
||||||
|
{Words, []}
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec words(emqx_types:topic()) -> [word()].
|
||||||
|
words(Topic) when is_binary(Topic) ->
|
||||||
|
% NOTE
|
||||||
|
% This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty
|
||||||
|
% tokens to ''. This is needed to keep ordering of words consistent with what
|
||||||
|
% `match_filter/3` expects.
|
||||||
|
[word(W) || W <- emqx_topic:tokens(Topic)].
|
||||||
|
|
||||||
|
-spec word(binary()) -> word().
|
||||||
|
word(<<"+">>) -> '+';
|
||||||
|
word(<<"#">>) -> '#';
|
||||||
|
word(Bin) -> Bin.
|
||||||
|
|
||||||
|
match_non_wildcards(#ctx{nextf = NextF} = C, {Topic, _} = Base, Acc) ->
|
||||||
|
case NextF(Base) of
|
||||||
|
{Topic, _ID} = Key ->
|
||||||
|
match_non_wildcards(C, Key, add(C, Acc, Key));
|
||||||
|
_Other ->
|
||||||
|
Acc
|
||||||
|
end.
|
|
@ -25,42 +25,71 @@
|
||||||
-import(emqx_proper_types, [scaled/2]).
|
-import(emqx_proper_types, [scaled/2]).
|
||||||
|
|
||||||
all() ->
|
all() ->
|
||||||
emqx_common_test_helpers:all(?MODULE).
|
[
|
||||||
|
{group, ets},
|
||||||
|
{group, gb_tree}
|
||||||
|
].
|
||||||
|
|
||||||
t_insert(_) ->
|
groups() ->
|
||||||
Tab = emqx_topic_index:new(),
|
All = emqx_common_test_helpers:all(?MODULE),
|
||||||
true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab),
|
[
|
||||||
true = emqx_topic_index:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab),
|
{ets, All},
|
||||||
true = emqx_topic_index:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab),
|
{gb_tree, All}
|
||||||
?assertEqual(<<"sensor/#">>, topic(match(<<"sensor">>, Tab))),
|
].
|
||||||
?assertEqual(t_insert_3, id(match(<<"sensor">>, Tab))).
|
|
||||||
|
|
||||||
t_match(_) ->
|
init_per_group(ets, Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
[{index_module, emqx_topic_index} | Config];
|
||||||
true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab),
|
init_per_group(gb_tree, Config) ->
|
||||||
true = emqx_topic_index:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab),
|
[{index_module, emqx_topic_gbt} | Config].
|
||||||
true = emqx_topic_index:insert(<<"sensor/#">>, t_match_3, <<>>, Tab),
|
|
||||||
|
end_per_group(_Group, _Config) ->
|
||||||
|
ok.
|
||||||
|
|
||||||
|
get_module(Config) ->
|
||||||
|
proplists:get_value(index_module, Config).
|
||||||
|
|
||||||
|
t_insert(Config) ->
|
||||||
|
M = get_module(Config),
|
||||||
|
Tab = M:new(),
|
||||||
|
true = M:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab),
|
||||||
|
true = M:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab),
|
||||||
|
true = M:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab),
|
||||||
|
?assertEqual(<<"sensor/#">>, topic(match(M, <<"sensor">>, Tab))),
|
||||||
|
?assertEqual(t_insert_3, id(match(M, <<"sensor">>, Tab))).
|
||||||
|
|
||||||
|
t_match(Config) ->
|
||||||
|
M = get_module(Config),
|
||||||
|
Tab = M:new(),
|
||||||
|
true = M:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab),
|
||||||
|
true = M:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab),
|
||||||
|
true = M:insert(<<"sensor/#">>, t_match_3, <<>>, Tab),
|
||||||
?assertMatch(
|
?assertMatch(
|
||||||
[<<"sensor/#">>, <<"sensor/+/#">>],
|
[<<"sensor/#">>, <<"sensor/+/#">>],
|
||||||
[topic(M) || M <- matches(<<"sensor/1">>, Tab)]
|
[topic(X) || X <- matches(M, <<"sensor/1">>, Tab)]
|
||||||
).
|
).
|
||||||
|
|
||||||
t_match2(_) ->
|
t_match2(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
true = emqx_topic_index:insert(<<"#">>, t_match2_1, <<>>, Tab),
|
Tab = M:new(),
|
||||||
true = emqx_topic_index:insert(<<"+/#">>, t_match2_2, <<>>, Tab),
|
true = M:insert(<<"#">>, t_match2_1, <<>>, Tab),
|
||||||
true = emqx_topic_index:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab),
|
true = M:insert(<<"+/#">>, t_match2_2, <<>>, Tab),
|
||||||
|
true = M:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
[<<"#">>, <<"+/#">>, <<"+/+/#">>],
|
[<<"#">>, <<"+/#">>, <<"+/+/#">>],
|
||||||
[topic(M) || M <- matches(<<"a/b/c">>, Tab)]
|
[topic(X) || X <- matches(M, <<"a/b/c">>, Tab)]
|
||||||
),
|
),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
false,
|
false,
|
||||||
emqx_topic_index:match(<<"$SYS/broker/zenmq">>, Tab)
|
M:match(<<"$SYS/broker/zenmq">>, Tab)
|
||||||
|
),
|
||||||
|
?assertEqual(
|
||||||
|
[],
|
||||||
|
matches(M, <<"$SYS/broker/zenmq">>, Tab)
|
||||||
).
|
).
|
||||||
|
|
||||||
t_match3(_) ->
|
t_match3(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
|
Tab = M:new(),
|
||||||
Records = [
|
Records = [
|
||||||
{<<"d/#">>, t_match3_1},
|
{<<"d/#">>, t_match3_1},
|
||||||
{<<"a/b/+">>, t_match3_2},
|
{<<"a/b/+">>, t_match3_2},
|
||||||
|
@ -69,37 +98,39 @@ t_match3(_) ->
|
||||||
{<<"$SYS/#">>, t_match3_sys}
|
{<<"$SYS/#">>, t_match3_sys}
|
||||||
],
|
],
|
||||||
lists:foreach(
|
lists:foreach(
|
||||||
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
|
fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end,
|
||||||
Records
|
Records
|
||||||
),
|
),
|
||||||
Matched = matches(<<"a/b/c">>, Tab),
|
Matched = matches(M, <<"a/b/c">>, Tab),
|
||||||
case length(Matched) of
|
case length(Matched) of
|
||||||
3 -> ok;
|
3 -> ok;
|
||||||
_ -> error({unexpected, Matched})
|
_ -> error({unexpected, Matched})
|
||||||
end,
|
end,
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
t_match3_sys,
|
t_match3_sys,
|
||||||
id(match(<<"$SYS/a/b/c">>, Tab))
|
id(match(M, <<"$SYS/a/b/c">>, Tab))
|
||||||
).
|
).
|
||||||
|
|
||||||
t_match4(_) ->
|
t_match4(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
|
Tab = M:new(),
|
||||||
Records = [{<<"/#">>, t_match4_1}, {<<"/+">>, t_match4_2}, {<<"/+/a/b/c">>, t_match4_3}],
|
Records = [{<<"/#">>, t_match4_1}, {<<"/+">>, t_match4_2}, {<<"/+/a/b/c">>, t_match4_3}],
|
||||||
lists:foreach(
|
lists:foreach(
|
||||||
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
|
fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end,
|
||||||
Records
|
Records
|
||||||
),
|
),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
[<<"/#">>, <<"/+">>],
|
[<<"/#">>, <<"/+">>],
|
||||||
[topic(M) || M <- matches(<<"/">>, Tab)]
|
[topic(X) || X <- matches(M, <<"/">>, Tab)]
|
||||||
),
|
),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
[<<"/#">>, <<"/+/a/b/c">>],
|
[<<"/#">>, <<"/+/a/b/c">>],
|
||||||
[topic(M) || M <- matches(<<"/0/a/b/c">>, Tab)]
|
[topic(X) || X <- matches(M, <<"/0/a/b/c">>, Tab)]
|
||||||
).
|
).
|
||||||
|
|
||||||
t_match5(_) ->
|
t_match5(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
|
Tab = M:new(),
|
||||||
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
|
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
|
||||||
Records = [
|
Records = [
|
||||||
{<<"#">>, t_match5_1},
|
{<<"#">>, t_match5_1},
|
||||||
|
@ -107,58 +138,63 @@ t_match5(_) ->
|
||||||
{<<T/binary, "/+">>, t_match5_3}
|
{<<T/binary, "/+">>, t_match5_3}
|
||||||
],
|
],
|
||||||
lists:foreach(
|
lists:foreach(
|
||||||
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
|
fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end,
|
||||||
Records
|
Records
|
||||||
),
|
),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
[<<"#">>, <<T/binary, "/#">>],
|
[<<"#">>, <<T/binary, "/#">>],
|
||||||
[topic(M) || M <- matches(T, Tab)]
|
[topic(X) || X <- matches(M, T, Tab)]
|
||||||
),
|
),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
[<<"#">>, <<T/binary, "/#">>, <<T/binary, "/+">>],
|
[<<"#">>, <<T/binary, "/#">>, <<T/binary, "/+">>],
|
||||||
[topic(M) || M <- matches(<<T/binary, "/1">>, Tab)]
|
[topic(X) || X <- matches(M, <<T/binary, "/1">>, Tab)]
|
||||||
).
|
).
|
||||||
|
|
||||||
t_match6(_) ->
|
t_match6(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
|
Tab = M:new(),
|
||||||
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
|
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
|
||||||
W = <<"+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/#">>,
|
W = <<"+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/#">>,
|
||||||
emqx_topic_index:insert(W, ID = t_match6, <<>>, Tab),
|
M:insert(W, ID = t_match6, <<>>, Tab),
|
||||||
?assertEqual(ID, id(match(T, Tab))).
|
?assertEqual(ID, id(match(M, T, Tab))).
|
||||||
|
|
||||||
t_match7(_) ->
|
t_match7(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
|
Tab = M:new(),
|
||||||
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
|
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
|
||||||
W = <<"a/+/c/+/e/+/g/+/i/+/k/+/m/+/o/+/q/+/s/+/u/+/w/+/y/+/#">>,
|
W = <<"a/+/c/+/e/+/g/+/i/+/k/+/m/+/o/+/q/+/s/+/u/+/w/+/y/+/#">>,
|
||||||
emqx_topic_index:insert(W, t_match7, <<>>, Tab),
|
M:insert(W, t_match7, <<>>, Tab),
|
||||||
?assertEqual(W, topic(match(T, Tab))).
|
?assertEqual(W, topic(match(M, T, Tab))).
|
||||||
|
|
||||||
t_match_fast_forward(_) ->
|
t_match_fast_forward(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
emqx_topic_index:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab),
|
Tab = M:new(),
|
||||||
emqx_topic_index:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab),
|
M:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab),
|
||||||
emqx_topic_index:insert(<<"a/b/c/+">>, id3, <<>>, Tab),
|
M:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab),
|
||||||
|
M:insert(<<"a/b/c/+">>, id3, <<>>, Tab),
|
||||||
% dbg:tracer(),
|
% dbg:tracer(),
|
||||||
% dbg:p(all, c),
|
% dbg:p(all, c),
|
||||||
% dbg:tpl({ets, next, '_'}, x),
|
% dbg:tpl({ets, next, '_'}, x),
|
||||||
?assertEqual(id1, id(match(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))),
|
?assertEqual(id1, id(match(M, <<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))),
|
||||||
?assertEqual([id1], [id(M) || M <- matches(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]).
|
?assertEqual([id1], [id(X) || X <- matches(M, <<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]).
|
||||||
|
|
||||||
t_match_unique(_) ->
|
t_match_unique(Config) ->
|
||||||
Tab = emqx_topic_index:new(),
|
M = get_module(Config),
|
||||||
emqx_topic_index:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab),
|
Tab = M:new(),
|
||||||
emqx_topic_index:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab),
|
M:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab),
|
||||||
emqx_topic_index:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab),
|
M:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab),
|
||||||
|
M:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
[t_match_id1, t_match_id1],
|
[t_match_id1, t_match_id1],
|
||||||
[id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [])]
|
[id(X) || X <- matches(M, <<"a/b/c">>, Tab, [])]
|
||||||
),
|
),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
[t_match_id1],
|
[t_match_id1],
|
||||||
[id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])]
|
[id(X) || X <- matches(M, <<"a/b/c">>, Tab, [unique])]
|
||||||
).
|
).
|
||||||
|
|
||||||
t_match_wildcard_edge_cases(_) ->
|
t_match_wildcard_edge_cases(Config) ->
|
||||||
|
M = get_module(Config),
|
||||||
CommonTopics = [
|
CommonTopics = [
|
||||||
<<"a/b">>,
|
<<"a/b">>,
|
||||||
<<"a/b/#">>,
|
<<"a/b/#">>,
|
||||||
|
@ -179,32 +215,33 @@ t_match_wildcard_edge_cases(_) ->
|
||||||
{[<<"/">>, <<"+">>], <<"a">>, [2]}
|
{[<<"/">>, <<"+">>], <<"a">>, [2]}
|
||||||
],
|
],
|
||||||
F = fun({Topics, TopicName, Expected}) ->
|
F = fun({Topics, TopicName, Expected}) ->
|
||||||
Tab = emqx_topic_index:new(),
|
Tab = M:new(),
|
||||||
_ = [emqx_topic_index:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)],
|
_ = [M:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)],
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
lists:last(Expected),
|
lists:last(Expected),
|
||||||
id(emqx_topic_index:match(TopicName, Tab)),
|
id(M:match(TopicName, Tab)),
|
||||||
#{"Base topics" => Topics, "Topic name" => TopicName}
|
#{"Base topics" => Topics, "Topic name" => TopicName}
|
||||||
),
|
),
|
||||||
?assertEqual(
|
?assertEqual(
|
||||||
Expected,
|
Expected,
|
||||||
[id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])],
|
[id(X) || X <- matches(M, TopicName, Tab, [unique])],
|
||||||
#{"Base topics" => Topics, "Topic name" => TopicName}
|
#{"Base topics" => Topics, "Topic name" => TopicName}
|
||||||
)
|
)
|
||||||
end,
|
end,
|
||||||
lists:foreach(F, Datasets).
|
lists:foreach(F, Datasets).
|
||||||
|
|
||||||
t_prop_matches(_) ->
|
t_prop_matches(Config) ->
|
||||||
|
M = get_module(Config),
|
||||||
?assert(
|
?assert(
|
||||||
proper:quickcheck(
|
proper:quickcheck(
|
||||||
topic_matches_prop(),
|
topic_matches_prop(M),
|
||||||
[{max_size, 100}, {numtests, 100}]
|
[{max_size, 100}, {numtests, 100}]
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
Statistics = [{C, account(C)} || C <- [filters, topics, matches, maxhits]],
|
Statistics = [{C, account(C)} || C <- [filters, topics, matches, maxhits]],
|
||||||
ct:pal("Statistics: ~p", [maps:from_list(Statistics)]).
|
ct:pal("Statistics: ~p", [maps:from_list(Statistics)]).
|
||||||
|
|
||||||
topic_matches_prop() ->
|
topic_matches_prop(M) ->
|
||||||
?FORALL(
|
?FORALL(
|
||||||
% Generate a longer list of topics and a shorter list of topic filter patterns.
|
% Generate a longer list of topics and a shorter list of topic filter patterns.
|
||||||
#{
|
#{
|
||||||
|
@ -219,12 +256,12 @@ topic_matches_prop() ->
|
||||||
patterns => list(topic_filter_pattern_t())
|
patterns => list(topic_filter_pattern_t())
|
||||||
}),
|
}),
|
||||||
begin
|
begin
|
||||||
Tab = emqx_topic_index:new(),
|
Tab = M:new(),
|
||||||
Topics = [emqx_topic:join(T) || T <- TTopics],
|
Topics = [emqx_topic:join(T) || T <- TTopics],
|
||||||
% Produce topic filters from generated topics and patterns.
|
% Produce topic filters from generated topics and patterns.
|
||||||
% Number of filters is equal to the number of patterns, most of the time.
|
% Number of filters is equal to the number of patterns, most of the time.
|
||||||
Filters = lists:enumerate(mk_filters(Pats, TTopics)),
|
Filters = lists:enumerate(mk_filters(Pats, TTopics)),
|
||||||
_ = [emqx_topic_index:insert(F, N, <<>>, Tab) || {N, F} <- Filters],
|
_ = [M:insert(F, N, <<>>, Tab) || {N, F} <- Filters],
|
||||||
% Gather some basic statistics
|
% Gather some basic statistics
|
||||||
_ = account(filters, length(Filters)),
|
_ = account(filters, length(Filters)),
|
||||||
_ = account(topics, NTopics = length(Topics)),
|
_ = account(topics, NTopics = length(Topics)),
|
||||||
|
@ -233,7 +270,7 @@ topic_matches_prop() ->
|
||||||
% matching it against the list of filters one by one.
|
% matching it against the list of filters one by one.
|
||||||
lists:all(
|
lists:all(
|
||||||
fun(Topic) ->
|
fun(Topic) ->
|
||||||
Ids1 = [id(M) || M <- emqx_topic_index:matches(Topic, Tab, [unique])],
|
Ids1 = [id(X) || X <- matches(M, Topic, Tab, [unique])],
|
||||||
Ids2 = lists:filtermap(
|
Ids2 = lists:filtermap(
|
||||||
fun({N, F}) ->
|
fun({N, F}) ->
|
||||||
case emqx_topic:match(Topic, F) of
|
case emqx_topic:match(Topic, F) of
|
||||||
|
@ -277,17 +314,20 @@ account(Counter) ->
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
match(T, Tab) ->
|
match(M, T, Tab) ->
|
||||||
emqx_topic_index:match(T, Tab).
|
M:match(T, Tab).
|
||||||
|
|
||||||
matches(T, Tab) ->
|
matches(M, T, Tab) ->
|
||||||
lists:sort(emqx_topic_index:matches(T, Tab, [])).
|
lists:sort(M:matches(T, Tab, [])).
|
||||||
|
|
||||||
|
matches(M, T, Tab, Opts) ->
|
||||||
|
M:matches(T, Tab, Opts).
|
||||||
|
|
||||||
id(Match) ->
|
id(Match) ->
|
||||||
emqx_topic_index:get_id(Match).
|
emqx_trie_search:get_id(Match).
|
||||||
|
|
||||||
topic(Match) ->
|
topic(Match) ->
|
||||||
emqx_topic_index:get_topic(Match).
|
emqx_trie_search:get_topic(Match).
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue