Merge pull request #11517 from keynslug/ft/topic-index-v3

feat(topicidx): iterate on trie search implementation
This commit is contained in:
Andrew Mayorov 2023-08-28 12:24:16 +04:00 committed by GitHub
commit b74ff10705
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 673 additions and 251 deletions

View File

@ -0,0 +1,120 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% @doc Topic index implemetation with gb_trees stored in persistent_term.
%% This is only suitable for a static set of topic or topic-filters.
-module(emqx_topic_gbt).
-export([new/0, new/1]).
-export([insert/4]).
-export([delete/3]).
-export([match/2]).
-export([matches/3]).
-export([get_id/1]).
-export([get_topic/1]).
-export([get_record/2]).
-type word() :: binary() | '+' | '#'.
-type key(ID) :: {[word()], {ID}}.
-type match(ID) :: key(ID).
-type name() :: any().
%% @private Only for testing.
-spec new() -> name().
new() ->
new(test).
%% @doc Create a new gb_tree and store it in the persitent_term with the
%% given name.
-spec new(name()) -> name().
new(Name) ->
T = gb_trees:from_orddict([]),
true = gbt_update(Name, T),
Name.
%% @doc Insert a new entry into the index that associates given topic filter to given
%% record ID, and attaches arbitrary record to the entry. This allows users to choose
%% between regular and "materialized" indexes, for example.
-spec insert(emqx_types:topic(), _ID, _Record, name()) -> true.
insert(Filter, ID, Record, Name) ->
Tree = gbt(Name),
Key = key(Filter, ID),
NewTree = gb_trees:enter(Key, Record, Tree),
true = gbt_update(Name, NewTree).
%% @doc Delete an entry from the index that associates given topic filter to given
%% record ID. Deleting non-existing entry is not an error.
-spec delete(emqx_types:topic(), _ID, name()) -> true.
delete(Filter, ID, Name) ->
Tree = gbt(Name),
Key = key(Filter, ID),
NewTree = gb_trees:delete_any(Key, Tree),
true = gbt_update(Name, NewTree).
%% @doc Match given topic against the index and return the first match, or `false` if
%% no match is found.
-spec match(emqx_types:topic(), name()) -> match(_ID) | false.
match(Topic, Name) ->
emqx_trie_search:match(Topic, make_nextf(Name)).
%% @doc Match given topic against the index and return _all_ matches.
%% If `unique` option is given, return only unique matches by record ID.
matches(Topic, Name, Opts) ->
emqx_trie_search:matches(Topic, make_nextf(Name), Opts).
%% @doc Extract record ID from the match.
-spec get_id(match(ID)) -> ID.
get_id(Key) ->
emqx_trie_search:get_id(Key).
%% @doc Extract topic (or topic filter) from the match.
-spec get_topic(match(_ID)) -> emqx_types:topic().
get_topic(Key) ->
emqx_trie_search:get_topic(Key).
%% @doc Fetch the record associated with the match.
-spec get_record(match(_ID), name()) -> _Record.
get_record(Key, Name) ->
Gbt = gbt(Name),
gb_trees:get(Key, Gbt).
key(TopicOrFilter, ID) ->
emqx_trie_search:make_key(TopicOrFilter, ID).
gbt(Name) ->
persistent_term:get({?MODULE, Name}).
gbt_update(Name, Tree) ->
persistent_term:put({?MODULE, Name}, Tree),
true.
gbt_next(nil, _Input) ->
'$end_of_table';
gbt_next({P, _V, _Smaller, Bigger}, K) when K >= P ->
gbt_next(Bigger, K);
gbt_next({P, _V, Smaller, _Bigger}, K) ->
case gbt_next(Smaller, K) of
'$end_of_table' ->
P;
NextKey ->
NextKey
end.
make_nextf(Name) ->
{_SizeWeDontCare, TheTree} = gbt(Name),
fun(Key) -> gbt_next(TheTree, Key) end.

View File

@ -14,18 +14,7 @@
%% limitations under the License.
%%--------------------------------------------------------------------
%% @doc Topic index for matching topics to topic filters.
%%
%% Works on top of ETS ordered_set table. Keys are tuples constructed from
%% parsed topic filters and record IDs, wrapped in a tuple to order them
%% strictly greater than unit tuple (`{}`). Existing table may be used if
%% existing keys will not collide with index keys.
%%
%% Designed to effectively answer questions like:
%% 1. Does any topic filter match given topic?
%% 2. Which records are associated with topic filters matching given topic?
%% 3. Which topic filters match given topic?
%% 4. Which record IDs are associated with topic filters matching given topic?
%% @doc Topic index implemetation with ETS table as ordered-set storage.
-module(emqx_topic_index).
@ -39,8 +28,7 @@
-export([get_topic/1]).
-export([get_record/2]).
-type word() :: binary() | '+' | '#'.
-type key(ID) :: {[word()], {ID}}.
-type key(ID) :: emqx_trie_search:key(ID).
-type match(ID) :: key(ID).
%% @doc Create a new ETS table suitable for topic index.
@ -54,171 +42,35 @@ new() ->
%% between regular and "materialized" indexes, for example.
-spec insert(emqx_types:topic(), _ID, _Record, ets:table()) -> true.
insert(Filter, ID, Record, Tab) ->
ets:insert(Tab, {{words(Filter), {ID}}, Record}).
Key = key(Filter, ID),
true = ets:insert(Tab, {Key, Record}).
%% @doc Delete an entry from the index that associates given topic filter to given
%% record ID. Deleting non-existing entry is not an error.
-spec delete(emqx_types:topic(), _ID, ets:table()) -> true.
delete(Filter, ID, Tab) ->
ets:delete(Tab, {words(Filter), {ID}}).
true = ets:delete(Tab, key(Filter, ID)).
%% @doc Match given topic against the index and return the first match, or `false` if
%% no match is found.
-spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false.
match(Topic, Tab) ->
{Words, RPrefix} = match_init(Topic),
match(Words, RPrefix, Tab).
match(Words, RPrefix, Tab) ->
Prefix = lists:reverse(RPrefix),
match(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Tab).
match(K, Prefix, Words, RPrefix, Tab) ->
case match_next(Prefix, K, Words) of
true ->
K;
skip ->
match(ets:next(Tab, K), Prefix, Words, RPrefix, Tab);
stop ->
false;
Matched ->
match_rest(Matched, Words, RPrefix, Tab)
end.
match_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Tab) ->
% NOTE
% Fast-forward through identical words in the topic and the last key suffixes.
% This should save us a few redundant `ets:next` calls at the cost of slightly
% more complex match patterns.
match_rest(SLast, Rest, [W1 | RPrefix], Tab);
match_rest(SLast, [W | Rest], RPrefix, Tab) when is_list(SLast) ->
match(Rest, [W | RPrefix], Tab);
match_rest(plus, [W | Rest], RPrefix, Tab) ->
% NOTE
% There's '+' in the key suffix, meaning we should consider 2 alternatives:
% 1. Match the rest of the topic as if there was '+' in the current position.
% 2. Skip this key and try to match the topic as it is.
case match(Rest, ['+' | RPrefix], Tab) of
Match = {_, _} ->
Match;
false ->
match(Rest, [W | RPrefix], Tab)
end;
match_rest(_, [], _RPrefix, _Tab) ->
false.
emqx_trie_search:match(Topic, make_nextf(Tab)).
%% @doc Match given topic against the index and return _all_ matches.
%% If `unique` option is given, return only unique matches by record ID.
-spec matches(emqx_types:topic(), ets:table(), _Opts :: [unique]) -> [match(_ID)].
matches(Topic, Tab, Opts) ->
{Words, RPrefix} = match_init(Topic),
AccIn =
case Opts of
[unique | _] -> #{};
[] -> []
end,
Matches = matches(Words, RPrefix, AccIn, Tab),
case Matches of
#{} -> maps:values(Matches);
_ -> Matches
end.
matches(Words, RPrefix, Acc, Tab) ->
Prefix = lists:reverse(RPrefix),
matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab).
matches(Words, RPrefix, K = {Filter, _}, Acc, Tab) ->
Prefix = lists:reverse(RPrefix),
case Prefix > Filter of
true ->
% NOTE: Prefix already greater than the last key seen, need to `ets:next/2`.
matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab);
false ->
% NOTE: Prefix is still less than or equal to the last key seen, reuse it.
matches(K, Prefix, Words, RPrefix, Acc, Tab)
end.
matches(K, Prefix, Words, RPrefix, Acc, Tab) ->
case match_next(Prefix, K, Words) of
true ->
matches(ets:next(Tab, K), Prefix, Words, RPrefix, match_add(K, Acc), Tab);
skip ->
matches(ets:next(Tab, K), Prefix, Words, RPrefix, Acc, Tab);
stop ->
Acc;
Matched ->
% NOTE: Prserve next key on the stack to save on `ets:next/2` calls.
matches_rest(Matched, Words, RPrefix, K, Acc, Tab)
end.
matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, K, Acc, Tab) ->
% NOTE
% Fast-forward through identical words in the topic and the last key suffixes.
% This should save us a few redundant `ets:next` calls at the cost of slightly
% more complex match patterns.
matches_rest(SLast, Rest, [W1 | RPrefix], K, Acc, Tab);
matches_rest(SLast, [W | Rest], RPrefix, K, Acc, Tab) when is_list(SLast) ->
matches(Rest, [W | RPrefix], K, Acc, Tab);
matches_rest(plus, [W | Rest], RPrefix, K, Acc, Tab) ->
% NOTE
% There's '+' in the key suffix, meaning we should accumulate all matches from
% each of 2 branches:
% 1. Match the rest of the topic as if there was '+' in the current position.
% 2. Skip this key and try to match the topic as it is.
NAcc = matches(Rest, ['+' | RPrefix], K, Acc, Tab),
matches(Rest, [W | RPrefix], K, NAcc, Tab);
matches_rest(_, [], _RPrefix, _K, Acc, _Tab) ->
Acc.
match_add(K = {_Filter, ID}, Acc = #{}) ->
% NOTE: ensuring uniqueness by record ID
Acc#{ID => K};
match_add(K, Acc) ->
[K | Acc].
match_next(Prefix, {Filter, _ID}, Suffix) ->
match_filter(Prefix, Filter, Suffix);
match_next(_, '$end_of_table', _) ->
stop.
match_filter([], [], []) ->
% NOTE: we matched the topic exactly
true;
match_filter([], [], _Suffix) ->
% NOTE: we matched the prefix, but there may be more matches next
skip;
match_filter([], ['#'], _Suffix) ->
% NOTE: naturally, '#' < '+', so this is already optimal for `match/2`
true;
match_filter([], ['+' | _], _Suffix) ->
plus;
match_filter([], [_H | _] = Rest, _Suffix) ->
Rest;
match_filter([H | T1], [H | T2], Suffix) ->
match_filter(T1, T2, Suffix);
match_filter([H1 | _], [H2 | _], _Suffix) when H2 > H1 ->
% NOTE: we're strictly past the prefix, no need to continue
stop.
match_init(Topic) ->
case words(Topic) of
[W = <<"$", _/bytes>> | Rest] ->
% NOTE
% This will effectively skip attempts to match special topics to `#` or `+/...`.
{Rest, [W]};
Words ->
{Words, []}
end.
emqx_trie_search:matches(Topic, make_nextf(Tab), Opts).
%% @doc Extract record ID from the match.
-spec get_id(match(ID)) -> ID.
get_id({_Filter, {ID}}) ->
ID.
get_id(Key) ->
emqx_trie_search:get_id(Key).
%% @doc Extract topic (or topic filter) from the match.
-spec get_topic(match(_ID)) -> emqx_types:topic().
get_topic({Filter, _ID}) ->
emqx_topic:join(Filter).
get_topic(Key) ->
emqx_trie_search:get_topic(Key).
%% @doc Fetch the record associated with the match.
%% NOTE: Only really useful for ETS tables where the record ID is the first element.
@ -226,17 +78,8 @@ get_topic({Filter, _ID}) ->
get_record(K, Tab) ->
ets:lookup_element(Tab, K, 2).
%%
key(TopicOrFilter, ID) ->
emqx_trie_search:make_key(TopicOrFilter, ID).
-spec words(emqx_types:topic()) -> [word()].
words(Topic) when is_binary(Topic) ->
% NOTE
% This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty
% tokens to ''. This is needed to keep ordering of words consistent with what
% `match_filter/3` expects.
[word(W) || W <- emqx_topic:tokens(Topic)].
-spec word(binary()) -> word().
word(<<"+">>) -> '+';
word(<<"#">>) -> '#';
word(Bin) -> Bin.
make_nextf(Tab) ->
fun(Key) -> ets:next(Tab, Key) end.

View File

@ -0,0 +1,346 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% @doc Topic index for matching topics to topic filters.
%%
%% Works on top of a ordered collection data set, such as ETS ordered_set table.
%% Keys are tuples constructed from parsed topic filters and record IDs,
%% wrapped in a tuple to order them strictly greater than unit tuple (`{}`).
%% Existing table may be used if existing keys will not collide with index keys.
%%
%% Designed to effectively answer questions like:
%% 1. Does any topic filter match given topic?
%% 2. Which records are associated with topic filters matching given topic?
%% 3. Which topic filters match given topic?
%% 4. Which record IDs are associated with topic filters matching given topic?
%%
%% Trie-search algorithm:
%%
%% Given a 3-level topic (e.g. a/b/c), if we leave out '#' for now,
%% all possible subscriptions of a/b/c can be enumerated as below:
%%
%% a/b/c
%% a/b/+
%% a/+/c <--- subscribed
%% a/+/+
%% +/b/c <--- subscribed
%% +/b/+
%% +/+/c
%% +/+/+ <--- start searching upward from here
%%
%% Let's name this search space "Space1".
%% If we brute-force it, the scope would be 8 (2^3).
%% Meaning this has O(2^N) complexity (N being the level of topics).
%%
%% This clearly isn't going to work.
%% Should we then try to enumerate all subscribers instead?
%% If there are also other subscriptions, e.g. "+/x/y" and "+/b/0"
%%
%% a/+/c <--- match of a/b/c
%% +/x/n
%% ...
%% +/x/2
%% +/x/1
%% +/b/c <--- match of a/b/c
%% +/b/1
%% +/b/0
%%
%% Let's name it "Space2".
%%
%% This has O(M * L) complexity (M being the total number of subscriptions,
%% and L being the number of topic levels).
%% This is usually a lot smaller than "Space1", but still not very effective
%% if the collection size is e.g. 1 million.
%%
%% To make it more effective, we'll need to combine the two algorithms:
%% Use the ordered subscription topics' prefixes as starting points to make
%% guesses about whether or not the next word can be a '+', and skip-over
%% to the next possible match.
%%
%% NOTE: A prerequisite of the ordered collection is, it should be able
%% to find the *immediate-next* topic/filter with a given prefix.
%%
%% In the above example, we start from "+/b/0". When comparing "+/b/0"
%% with "a/b/c", we know the matching prefix is "+/b", meaning we can
%% start guessing if the next word is '+' or 'c':
%% * It can't be '+' because '+' < '0'
%% * It might be 'c' because 'c' > '0'
%%
%% So, we try to jump to the next topic which has a prefix of "+/b/c"
%% (this effectively means skipping over "+/b/1").
%%
%% After "+/b/c" is found to be a matching filter, we move up:
%% * The next possible match is "a/+/+" according to Space1
%% * The next subscription is "+/x/1" according to Space2
%%
%% "a/+/+" is lexicographically greater than "+/x/+", so let's jump to
%% the immediate-next of 'a/+/+', which is "a/+/c", allowing us to skip
%% over all the ones starting with "+/x".
%%
%% If we take '#' into consideration, it's only one extra comparison to see
%% if a filter ends with '#'.
%%
%% In summary, the complexity of this algorithm is O(N * L)
%% N being the number of total matches, and L being the level of the topic.
-module(emqx_trie_search).
-export([make_key/2]).
-export([match/2, matches/3, get_id/1, get_topic/1]).
-export_type([key/1, word/0, nextf/0, opts/0]).
-define(END, '$end_of_table').
-type word() :: binary() | '+' | '#'.
-type base_key() :: {binary() | [word()], {}}.
-type key(ID) :: {binary() | [word()], {ID}}.
-type nextf() :: fun((key(_) | base_key()) -> ?END | key(_)).
-type opts() :: [unique | return_first].
%% @doc Make a search-key for the given topic.
-spec make_key(emqx_types:topic(), ID) -> key(ID).
make_key(Topic, ID) when is_binary(Topic) ->
Words = filter_words(Topic),
case emqx_topic:wildcard(Words) of
true ->
%% it's a wildcard
{Words, {ID}};
false ->
%% Not a wildcard. We do not split the topic
%% because they can be found with direct lookups.
%% it is also more compact in memory.
{Topic, {ID}}
end.
%% @doc Extract record ID from the match.
-spec get_id(key(ID)) -> ID.
get_id({_Filter, {ID}}) ->
ID.
%% @doc Extract topic (or topic filter) from the match.
-spec get_topic(key(_ID)) -> emqx_types:topic().
get_topic({Filter, _ID}) when is_list(Filter) ->
emqx_topic:join(Filter);
get_topic({Topic, _ID}) ->
Topic.
-compile({inline, [base/1, move_up/2, match_add/2, compare/3]}).
%% Make the base-key which can be used to locate the desired search target.
base(Prefix) ->
{Prefix, {}}.
base_init([W = <<"$", _/bytes>> | _]) ->
base([W]);
base_init(_) ->
base([]).
%% Move the search target to the key next to the given Base.
move_up(NextF, Base) ->
NextF(Base).
%% @doc Match given topic against the index and return the first match, or `false` if
%% no match is found.
-spec match(emqx_types:topic(), nextf()) -> false | key(_).
match(Topic, NextF) ->
try search(Topic, NextF, [return_first]) of
_ -> false
catch
throw:{first, Res} ->
Res
end.
%% @doc Match given topic against the index and return _all_ matches.
%% If `unique` option is given, return only unique matches by record ID.
-spec matches(emqx_types:topic(), nextf(), opts()) -> [key(_)].
matches(Topic, NextF, Opts) ->
search(Topic, NextF, Opts).
%% @doc Entrypoint of the search for a given topic.
search(Topic, NextF, Opts) ->
Words = topic_words(Topic),
Base = base_init(Words),
ORetFirst = proplists:get_bool(return_first, Opts),
OUnique = proplists:get_bool(unique, Opts),
Acc0 =
case ORetFirst of
true ->
first;
false when OUnique ->
#{};
false ->
[]
end,
Matches =
case search_new(Words, Base, NextF, Acc0) of
{Cursor, Acc} ->
match_topics(Topic, Cursor, NextF, Acc);
Acc ->
Acc
end,
case is_map(Matches) of
true ->
maps:values(Matches);
false ->
Matches
end.
%% The recursive entrypoint of the trie-search algorithm.
%% Always start from the initial prefix and words.
search_new(Words0, NewBase, NextF, Acc) ->
case move_up(NextF, NewBase) of
?END ->
Acc;
Cursor ->
search_up(Words0, Cursor, NextF, Acc)
end.
%% Search to the bigger end of ordered collection of topics and topic-filters.
search_up(Words, {Filter, _} = Cursor, NextF, Acc) ->
case compare(Filter, Words, 0) of
match_full ->
search_new(Words, Cursor, NextF, match_add(Cursor, Acc));
match_prefix ->
search_new(Words, Cursor, NextF, Acc);
lower ->
{Cursor, Acc};
{Pos, SeekWord} ->
% NOTE
% This is a seek instruction. It means we need to take `Pos` words
% from the current topic filter and attach `SeekWord` to the end of it.
NewBase = base(seek(Pos, SeekWord, Filter)),
search_new(Words, NewBase, NextF, Acc)
end.
seek(_Pos = 0, SeekWord, _FilterTail) ->
[SeekWord];
seek(Pos, SeekWord, [FilterWord | Rest]) ->
[FilterWord | seek(Pos - 1, SeekWord, Rest)].
compare(NotFilter, _, _) when is_binary(NotFilter) ->
lower;
compare([], [], _) ->
% NOTE
% Topic: a/b/c/d
% Filter: a/+/+/d
% We matched the topic to a topic filter exactly (possibly with pluses).
% We include it in the result set, and now need to try next entry in the table.
% Closest possible next entries that we must not miss:
% * a/+/+/d (same topic but a different ID)
% * a/+/+/d/# (also a match)
match_full;
compare([], _Words, _) ->
% NOTE
% Topic: a/b/c/d
% Filter: a/+/c
% We found out that a topic filter is a prefix of the topic (possibly with pluses).
% We discard it, and now need to try next entry in the table.
% Closest possible next entries that we must not miss:
% * a/+/c/# (which is a match)
% * a/+/c/+ (also a match)
match_prefix;
compare(['#'], _Words, _) ->
% NOTE
% Topic: a/b/c/d
% Filter: a/+/+/d/# or just a/#
% We matched the topic to a topic filter with wildcard (possibly with pluses).
% We include it in the result set, and now need to try next entry in the table.
% Closest possible next entries that we must not miss:
% * a/+/+/d/# (same topic but a different ID)
match_full;
compare(['+' | TF], [HW | TW], Pos) ->
case compare(TF, TW, Pos + 1) of
lower ->
% NOTE
% Topic: a/b/c/d
% Filter: a/+/+/e/1 or a/b/+/d/1
% The topic is lower than a topic filter. But we're at the `+` position,
% so we emit a backtrack point to seek to:
% Seek: {2, c}
% We skip over part of search space, and seek to the next possible match:
% Next: a/+/c
{Pos, HW};
Other ->
% NOTE
% It's either already a backtrack point, emitted from the last `+`
% position or just a seek / match. In both cases we just pass it
% through.
Other
end;
compare([HW | TF], [HW | TW], Pos) ->
% NOTE
% Skip over the same word in both topic and filter, keeping the last backtrack point.
compare(TF, TW, Pos + 1);
compare([HF | _], [HW | _], _) when HF > HW ->
% NOTE
% Topic: a/b/c/d
% Filter: a/b/c/e/1 or a/b/+/e
% The topic is lower than a topic filter. In the first case there's nowhere to
% backtrack to, we're out of the search space. In the second case there's a `+`
% on 3rd level, we'll seek up from there.
lower;
compare([_ | _], [], _) ->
% NOTE
% Topic: a/b/c/d
% Filter: a/b/c/d/1 or a/+/c/d/1
% The topic is lower than a topic filter (since it's shorter). In the first case
% there's nowhere to backtrack to, we're out of the search space. In the second case
% there's a `+` on 2nd level, we'll seek up from there.
lower;
compare([_ | _], [HW | _], Pos) ->
% NOTE
% Topic: a/b/c/d
% Filter: a/+/+/0/1/2
% Topic is higher than the filter, we need to skip over to the next possible filter.
% Seek: {3, d}
% Next: a/+/+/d
{Pos, HW}.
match_add(K = {_Filter, ID}, Acc = #{}) ->
% NOTE: ensuring uniqueness by record ID
Acc#{ID => K};
match_add(K, Acc) when is_list(Acc) ->
[K | Acc];
match_add(K, first) ->
throw({first, K}).
-spec filter_words(emqx_types:topic()) -> [word()].
filter_words(Topic) when is_binary(Topic) ->
% NOTE
% This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty
% tokens to ''. This is needed to keep ordering of words consistent with what
% `match_filter/3` expects.
[word(W, filter) || W <- emqx_topic:tokens(Topic)].
topic_words(Topic) when is_binary(Topic) ->
[word(W, topic) || W <- emqx_topic:tokens(Topic)].
word(<<"+">>, topic) -> error(badarg);
word(<<"#">>, topic) -> error(badarg);
word(<<"+">>, filter) -> '+';
word(<<"#">>, filter) -> '#';
word(Bin, _) -> Bin.
%% match non-wildcard topics
match_topics(Topic, {Topic, _} = Key, NextF, Acc) ->
%% found a topic match
match_topics(Topic, NextF(Key), NextF, match_add(Key, Acc));
match_topics(Topic, {F, _}, NextF, Acc) when F < Topic ->
%% the last key is a filter, try jump to the topic
match_topics(Topic, NextF(base(Topic)), NextF, Acc);
match_topics(_Topic, _Key, _NextF, Acc) ->
%% gone pass the topic
Acc.

View File

@ -25,42 +25,71 @@
-import(emqx_proper_types, [scaled/2]).
all() ->
emqx_common_test_helpers:all(?MODULE).
[
{group, ets},
{group, gb_tree}
].
t_insert(_) ->
Tab = emqx_topic_index:new(),
true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab),
?assertEqual(<<"sensor/#">>, topic(match(<<"sensor">>, Tab))),
?assertEqual(t_insert_3, id(match(<<"sensor">>, Tab))).
groups() ->
All = emqx_common_test_helpers:all(?MODULE),
[
{ets, All},
{gb_tree, All}
].
t_match(_) ->
Tab = emqx_topic_index:new(),
true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab),
true = emqx_topic_index:insert(<<"sensor/#">>, t_match_3, <<>>, Tab),
init_per_group(ets, Config) ->
[{index_module, emqx_topic_index} | Config];
init_per_group(gb_tree, Config) ->
[{index_module, emqx_topic_gbt} | Config].
end_per_group(_Group, _Config) ->
ok.
get_module(Config) ->
proplists:get_value(index_module, Config).
t_insert(Config) ->
M = get_module(Config),
Tab = M:new(),
true = M:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab),
true = M:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab),
true = M:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab),
?assertEqual(<<"sensor/#">>, topic(match(M, <<"sensor">>, Tab))),
?assertEqual(t_insert_3, id(match(M, <<"sensor">>, Tab))).
t_match(Config) ->
M = get_module(Config),
Tab = M:new(),
true = M:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab),
true = M:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab),
true = M:insert(<<"sensor/#">>, t_match_3, <<>>, Tab),
?assertMatch(
[<<"sensor/#">>, <<"sensor/+/#">>],
[topic(M) || M <- matches(<<"sensor/1">>, Tab)]
[topic(X) || X <- matches(M, <<"sensor/1">>, Tab)]
).
t_match2(_) ->
Tab = emqx_topic_index:new(),
true = emqx_topic_index:insert(<<"#">>, t_match2_1, <<>>, Tab),
true = emqx_topic_index:insert(<<"+/#">>, t_match2_2, <<>>, Tab),
true = emqx_topic_index:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab),
t_match2(Config) ->
M = get_module(Config),
Tab = M:new(),
true = M:insert(<<"#">>, t_match2_1, <<>>, Tab),
true = M:insert(<<"+/#">>, t_match2_2, <<>>, Tab),
true = M:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab),
?assertEqual(
[<<"#">>, <<"+/#">>, <<"+/+/#">>],
[topic(M) || M <- matches(<<"a/b/c">>, Tab)]
[topic(X) || X <- matches(M, <<"a/b/c">>, Tab)]
),
?assertEqual(
false,
emqx_topic_index:match(<<"$SYS/broker/zenmq">>, Tab)
M:match(<<"$SYS/broker/zenmq">>, Tab)
),
?assertEqual(
[],
matches(M, <<"$SYS/broker/zenmq">>, Tab)
).
t_match3(_) ->
Tab = emqx_topic_index:new(),
t_match3(Config) ->
M = get_module(Config),
Tab = M:new(),
Records = [
{<<"d/#">>, t_match3_1},
{<<"a/b/+">>, t_match3_2},
@ -69,37 +98,39 @@ t_match3(_) ->
{<<"$SYS/#">>, t_match3_sys}
],
lists:foreach(
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end,
Records
),
Matched = matches(<<"a/b/c">>, Tab),
Matched = matches(M, <<"a/b/c">>, Tab),
case length(Matched) of
3 -> ok;
_ -> error({unexpected, Matched})
end,
?assertEqual(
t_match3_sys,
id(match(<<"$SYS/a/b/c">>, Tab))
id(match(M, <<"$SYS/a/b/c">>, Tab))
).
t_match4(_) ->
Tab = emqx_topic_index:new(),
t_match4(Config) ->
M = get_module(Config),
Tab = M:new(),
Records = [{<<"/#">>, t_match4_1}, {<<"/+">>, t_match4_2}, {<<"/+/a/b/c">>, t_match4_3}],
lists:foreach(
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end,
Records
),
?assertEqual(
[<<"/#">>, <<"/+">>],
[topic(M) || M <- matches(<<"/">>, Tab)]
[topic(X) || X <- matches(M, <<"/">>, Tab)]
),
?assertEqual(
[<<"/#">>, <<"/+/a/b/c">>],
[topic(M) || M <- matches(<<"/0/a/b/c">>, Tab)]
[topic(X) || X <- matches(M, <<"/0/a/b/c">>, Tab)]
).
t_match5(_) ->
Tab = emqx_topic_index:new(),
t_match5(Config) ->
M = get_module(Config),
Tab = M:new(),
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
Records = [
{<<"#">>, t_match5_1},
@ -107,58 +138,89 @@ t_match5(_) ->
{<<T/binary, "/+">>, t_match5_3}
],
lists:foreach(
fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end,
fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end,
Records
),
?assertEqual(
[<<"#">>, <<T/binary, "/#">>],
[topic(M) || M <- matches(T, Tab)]
[topic(X) || X <- matches(M, T, Tab)]
),
?assertEqual(
[<<"#">>, <<T/binary, "/#">>, <<T/binary, "/+">>],
[topic(M) || M <- matches(<<T/binary, "/1">>, Tab)]
[topic(X) || X <- matches(M, <<T/binary, "/1">>, Tab)]
).
t_match6(_) ->
Tab = emqx_topic_index:new(),
t_match6(Config) ->
M = get_module(Config),
Tab = M:new(),
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
W = <<"+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/#">>,
emqx_topic_index:insert(W, ID = t_match6, <<>>, Tab),
?assertEqual(ID, id(match(T, Tab))).
M:insert(W, ID = t_match6, <<>>, Tab),
?assertEqual(ID, id(match(M, T, Tab))).
t_match7(_) ->
Tab = emqx_topic_index:new(),
t_match7(Config) ->
M = get_module(Config),
Tab = M:new(),
T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>,
W = <<"a/+/c/+/e/+/g/+/i/+/k/+/m/+/o/+/q/+/s/+/u/+/w/+/y/+/#">>,
emqx_topic_index:insert(W, t_match7, <<>>, Tab),
?assertEqual(W, topic(match(T, Tab))).
M:insert(W, t_match7, <<>>, Tab),
?assertEqual(W, topic(match(M, T, Tab))).
t_match_fast_forward(_) ->
Tab = emqx_topic_index:new(),
emqx_topic_index:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab),
emqx_topic_index:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab),
emqx_topic_index:insert(<<"a/b/c/+">>, id3, <<>>, Tab),
t_match8(Config) ->
M = get_module(Config),
Tab = M:new(),
Filters = [<<"+">>, <<"dev/global/sensor">>, <<"dev/+/sensor/#">>],
IDs = [1, 2, 3],
Keys = [{F, ID} || F <- Filters, ID <- IDs],
lists:foreach(
fun({F, ID}) ->
M:insert(F, ID, <<>>, Tab)
end,
Keys
),
Topic = <<"dev/global/sensor">>,
Matches = lists:sort(matches(M, Topic, Tab)),
?assertEqual(
[
<<"dev/+/sensor/#">>,
<<"dev/+/sensor/#">>,
<<"dev/+/sensor/#">>,
<<"dev/global/sensor">>,
<<"dev/global/sensor">>,
<<"dev/global/sensor">>
],
[emqx_topic_index:get_topic(Match) || Match <- Matches]
).
t_match_fast_forward(Config) ->
M = get_module(Config),
Tab = M:new(),
M:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab),
M:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab),
M:insert(<<"a/b/c/+">>, id3, <<>>, Tab),
% dbg:tracer(),
% dbg:p(all, c),
% dbg:tpl({ets, next, '_'}, x),
?assertEqual(id1, id(match(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))),
?assertEqual([id1], [id(M) || M <- matches(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]).
?assertEqual(id1, id(match(M, <<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))),
?assertEqual([id1], [id(X) || X <- matches(M, <<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]).
t_match_unique(_) ->
Tab = emqx_topic_index:new(),
emqx_topic_index:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab),
emqx_topic_index:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab),
emqx_topic_index:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab),
t_match_unique(Config) ->
M = get_module(Config),
Tab = M:new(),
M:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab),
M:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab),
M:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab),
?assertEqual(
[t_match_id1, t_match_id1],
[id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [])]
[id(X) || X <- matches(M, <<"a/b/c">>, Tab, [])]
),
?assertEqual(
[t_match_id1],
[id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])]
[id(X) || X <- matches(M, <<"a/b/c">>, Tab, [unique])]
).
t_match_wildcard_edge_cases(_) ->
t_match_wildcard_edge_cases(Config) ->
M = get_module(Config),
CommonTopics = [
<<"a/b">>,
<<"a/b/#">>,
@ -179,32 +241,46 @@ t_match_wildcard_edge_cases(_) ->
{[<<"/">>, <<"+">>], <<"a">>, [2]}
],
F = fun({Topics, TopicName, Expected}) ->
Tab = emqx_topic_index:new(),
_ = [emqx_topic_index:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)],
Tab = M:new(),
_ = [M:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)],
?assertEqual(
lists:last(Expected),
id(emqx_topic_index:match(TopicName, Tab)),
id(M:match(TopicName, Tab)),
#{"Base topics" => Topics, "Topic name" => TopicName}
),
?assertEqual(
Expected,
[id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])],
[id(X) || X <- matches(M, TopicName, Tab, [unique])],
#{"Base topics" => Topics, "Topic name" => TopicName}
)
end,
lists:foreach(F, Datasets).
t_prop_matches(_) ->
t_prop_edgecase(Config) ->
M = get_module(Config),
Tab = M:new(),
Topic = <<"01/01">>,
Filters = [
{1, <<>>},
{2, <<"+/01">>},
{3, <<>>},
{4, <<"+/+/01">>}
],
_ = [M:insert(F, N, <<>>, Tab) || {N, F} <- Filters],
?assertMatch([2], [id(X) || X <- matches(M, Topic, Tab, [unique])]).
t_prop_matches(Config) ->
M = get_module(Config),
?assert(
proper:quickcheck(
topic_matches_prop(),
topic_matches_prop(M),
[{max_size, 100}, {numtests, 100}]
)
),
Statistics = [{C, account(C)} || C <- [filters, topics, matches, maxhits]],
ct:pal("Statistics: ~p", [maps:from_list(Statistics)]).
topic_matches_prop() ->
topic_matches_prop(M) ->
?FORALL(
% Generate a longer list of topics and a shorter list of topic filter patterns.
#{
@ -219,12 +295,12 @@ topic_matches_prop() ->
patterns => list(topic_filter_pattern_t())
}),
begin
Tab = emqx_topic_index:new(),
Tab = M:new(),
Topics = [emqx_topic:join(T) || T <- TTopics],
% Produce topic filters from generated topics and patterns.
% Number of filters is equal to the number of patterns, most of the time.
Filters = lists:enumerate(mk_filters(Pats, TTopics)),
_ = [emqx_topic_index:insert(F, N, <<>>, Tab) || {N, F} <- Filters],
_ = [M:insert(F, N, <<>>, Tab) || {N, F} <- Filters],
% Gather some basic statistics
_ = account(filters, length(Filters)),
_ = account(topics, NTopics = length(Topics)),
@ -233,7 +309,7 @@ topic_matches_prop() ->
% matching it against the list of filters one by one.
lists:all(
fun(Topic) ->
Ids1 = [id(M) || M <- emqx_topic_index:matches(Topic, Tab, [unique])],
Ids1 = [id(X) || X <- matches(M, Topic, Tab, [unique])],
Ids2 = lists:filtermap(
fun({N, F}) ->
case emqx_topic:match(Topic, F) of
@ -252,8 +328,9 @@ topic_matches_prop() ->
ct:pal(
"Topic name: ~p~n"
"Index results: ~p~n"
"Topic match results:: ~p~n",
[Topic, Ids1, Ids2]
"Topic match results: ~p~n"
"Filters: ~p~n",
[Topic, Ids1, Ids2, Filters]
),
false
end
@ -276,17 +353,20 @@ account(Counter) ->
%%
match(T, Tab) ->
emqx_topic_index:match(T, Tab).
match(M, T, Tab) ->
M:match(T, Tab).
matches(T, Tab) ->
lists:sort(emqx_topic_index:matches(T, Tab, [])).
matches(M, T, Tab) ->
lists:sort(M:matches(T, Tab, [])).
matches(M, T, Tab, Opts) ->
M:matches(T, Tab, Opts).
id(Match) ->
emqx_topic_index:get_id(Match).
emqx_trie_search:get_id(Match).
topic(Match) ->
emqx_topic_index:get_topic(Match).
emqx_trie_search:get_topic(Match).
%%

View File

@ -0,0 +1,32 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
-module(emqx_trie_search_tests).
-include_lib("eunit/include/eunit.hrl").
topic_validation_test() ->
NextF = fun(_) -> '$end_of_table' end,
Call = fun(Topic) ->
emqx_trie_search:match(Topic, NextF)
end,
?assertError(badarg, Call(<<"+">>)),
?assertError(badarg, Call(<<"#">>)),
?assertError(badarg, Call(<<"a/+/b">>)),
?assertError(badarg, Call(<<"a/b/#">>)),
?assertEqual(false, Call(<<"a/b/b+">>)),
?assertEqual(false, Call(<<"a/b/c#">>)),
ok.

View File

@ -649,7 +649,7 @@ do_wait_for_emqx_ready(N) ->
ok ->
ok;
timeout ->
?SLOG(warning, #{msg => "stil_waiting_for_emqx_app_to_be_ready"}),
?SLOG(warning, #{msg => "still_waiting_for_emqx_app_to_be_ready"}),
do_wait_for_emqx_ready(N - 1)
end.

View File

@ -190,7 +190,8 @@ test_deps() ->
{meck, "0.9.2"},
{proper, "1.4.0"},
{er_coap_client, {git, "https://github.com/emqx/er_coap_client", {tag, "v1.0.5"}}},
{erl_csv, "0.2.0"}
{erl_csv, "0.2.0"},
{eministat, "0.10.1"}
].
common_compile_opts() ->