From ae094e363caf2a02edd586b0168a1006c81d7289 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 15 Aug 2023 16:27:19 +0200 Subject: [PATCH 01/12] chore: fix a typo in log message --- apps/emqx_conf/src/emqx_cluster_rpc.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx_conf/src/emqx_cluster_rpc.erl b/apps/emqx_conf/src/emqx_cluster_rpc.erl index bb154f8b5..934d7ef7a 100644 --- a/apps/emqx_conf/src/emqx_cluster_rpc.erl +++ b/apps/emqx_conf/src/emqx_cluster_rpc.erl @@ -649,7 +649,7 @@ do_wait_for_emqx_ready(N) -> ok -> ok; timeout -> - ?SLOG(warning, #{msg => "stil_waiting_for_emqx_app_to_be_ready"}), + ?SLOG(warning, #{msg => "still_waiting_for_emqx_app_to_be_ready"}), do_wait_for_emqx_ready(N - 1) end. From 6b152b3cb70af21558be7bd8be050d73d33c51af Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Sun, 20 Aug 2023 20:21:45 +0200 Subject: [PATCH 02/12] test: add more debug output --- apps/emqx/test/emqx_topic_index_SUITE.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index ade98acec..9058f3597 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -252,8 +252,9 @@ topic_matches_prop() -> ct:pal( "Topic name: ~p~n" "Index results: ~p~n" - "Topic match results:: ~p~n", - [Topic, Ids1, Ids2] + "Topic match results: ~p~n" + "Filters: ~p~n", + [Topic, Ids1, Ids2, Filters] ), false end From f4c8c6be5573804203dcd5d7e66ede487cfd19bf Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 17 Aug 2023 10:13:22 +0200 Subject: [PATCH 03/12] refactor(topic_index): optimize trie-search performance --- apps/emqx/src/emqx_topic_gbt.erl | 116 ++++++++ apps/emqx/src/emqx_topic_index.erl | 191 ++---------- apps/emqx/src/emqx_trie_search.erl | 344 ++++++++++++++++++++++ apps/emqx/test/emqx_topic_index_SUITE.erl | 190 +++++++----- 4 files changed, 593 insertions(+), 248 deletions(-) create mode 100644 apps/emqx/src/emqx_topic_gbt.erl create mode 100644 apps/emqx/src/emqx_trie_search.erl diff --git a/apps/emqx/src/emqx_topic_gbt.erl b/apps/emqx/src/emqx_topic_gbt.erl new file mode 100644 index 000000000..da6320bc7 --- /dev/null +++ b/apps/emqx/src/emqx_topic_gbt.erl @@ -0,0 +1,116 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc Topic index implemetation with gb_trees stored in persistent_term. +%% This is only suitable for a static set of topic or topic-filters. + +-module(emqx_topic_gbt). + +-export([new/0, new/1]). +-export([insert/4]). +-export([delete/3]). +-export([match/2]). +-export([matches/3]). + +-export([get_id/1]). +-export([get_topic/1]). +-export([get_record/2]). + +-type word() :: binary() | '+' | '#'. +-type key(ID) :: {[word()], {ID}}. +-type match(ID) :: key(ID). +-type name() :: any(). + +%% @private Only for testing. +-spec new() -> name(). +new() -> + new(test). + +%% @doc Create a new gb_tree and store it in the persitent_term with the +%% given name. +-spec new(name()) -> name(). +new(Name) -> + T = gb_trees:from_orddict([]), + true = gbt_update(Name, T), + Name. + +%% @doc Insert a new entry into the index that associates given topic filter to given +%% record ID, and attaches arbitrary record to the entry. This allows users to choose +%% between regular and "materialized" indexes, for example. +-spec insert(emqx_types:topic(), _ID, _Record, name()) -> true. +insert(Filter, ID, Record, Name) -> + Tree = gbt(Name), + Key = key(Filter, ID), + NewTree = gb_trees:enter(Key, Record, Tree), + true = gbt_update(Name, NewTree). + +%% @doc Delete an entry from the index that associates given topic filter to given +%% record ID. Deleting non-existing entry is not an error. +-spec delete(emqx_types:topic(), _ID, name()) -> true. +delete(Filter, ID, Name) -> + Tree = gbt(Name), + Key = key(Filter, ID), + NewTree = gb_trees:delete_any(Key, Tree), + true = gbt_update(Name, NewTree). + +%% @doc Match given topic against the index and return the first match, or `false` if +%% no match is found. +-spec match(emqx_types:topic(), name()) -> match(_ID) | false. +match(Topic, Name) -> + emqx_trie_search:match(Topic, make_nextf(Name)). + +%% @doc Match given topic against the index and return _all_ matches. +%% If `unique` option is given, return only unique matches by record ID. +matches(Topic, Name, Opts) -> + emqx_trie_search:matches(Topic, make_nextf(Name), Opts). + +%% @doc Extract record ID from the match. +-spec get_id(match(ID)) -> ID. +get_id(Key) -> + emqx_trie_search:get_id(Key). + +%% @doc Extract topic (or topic filter) from the match. +-spec get_topic(match(_ID)) -> emqx_types:topic(). +get_topic(Key) -> + emqx_trie_search:get_topic(Key). + +%% @doc Fetch the record associated with the match. +-spec get_record(match(_ID), name()) -> _Record. +get_record(Key, Name) -> + Gbt = gbt(Name), + gb_trees:get(Key, Gbt). + +key(TopicOrFilter, ID) -> + emqx_trie_search:make_key(TopicOrFilter, ID). + +gbt(Name) -> + persistent_term:get({?MODULE, Name}). + +gbt_update(Name, Tree) -> + persistent_term:put({?MODULE, Name}, Tree), + true. + +gbt_next(nil, _Input) -> + emqx_trie_search:ceiling(); +gbt_next({P, _V, _Smaller, Bigger}, K) when K >= P -> + gbt_next(Bigger, K); +gbt_next({P, _V, Smaller, _Bigger}, K) -> + NextKey = gbt_next(Smaller, K), + min(P, NextKey). + +make_nextf(Name) -> + {_SizeWeDontCare, TheTree} = gbt(Name), + fun(Key) -> gbt_next(TheTree, Key) end. diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index a6f662f74..e1da5b3b7 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -14,18 +14,7 @@ %% limitations under the License. %%-------------------------------------------------------------------- -%% @doc Topic index for matching topics to topic filters. -%% -%% Works on top of ETS ordered_set table. Keys are tuples constructed from -%% parsed topic filters and record IDs, wrapped in a tuple to order them -%% strictly greater than unit tuple (`{}`). Existing table may be used if -%% existing keys will not collide with index keys. -%% -%% Designed to effectively answer questions like: -%% 1. Does any topic filter match given topic? -%% 2. Which records are associated with topic filters matching given topic? -%% 3. Which topic filters match given topic? -%% 4. Which record IDs are associated with topic filters matching given topic? +%% @doc Topic index implemetation with ETS table as ordered-set storage. -module(emqx_topic_index). @@ -39,186 +28,51 @@ -export([get_topic/1]). -export([get_record/2]). --type word() :: binary() | '+' | '#'. --type key(ID) :: {[word()], {ID}}. +-type key(ID) :: emqx_trie_search:key(ID). -type match(ID) :: key(ID). %% @doc Create a new ETS table suitable for topic index. %% Usable mostly for testing purposes. -spec new() -> ets:table(). new() -> - ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]). + T = ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]), + ets:insert(T, {emqx_trie_search:ceiling(), []}), + T. %% @doc Insert a new entry into the index that associates given topic filter to given %% record ID, and attaches arbitrary record to the entry. This allows users to choose %% between regular and "materialized" indexes, for example. -spec insert(emqx_types:topic(), _ID, _Record, ets:table()) -> true. insert(Filter, ID, Record, Tab) -> - ets:insert(Tab, {{words(Filter), {ID}}, Record}). + Key = key(Filter, ID), + true = ets:insert(Tab, {Key, Record}). %% @doc Delete an entry from the index that associates given topic filter to given %% record ID. Deleting non-existing entry is not an error. -spec delete(emqx_types:topic(), _ID, ets:table()) -> true. delete(Filter, ID, Tab) -> - ets:delete(Tab, {words(Filter), {ID}}). + true = ets:delete(Tab, key(Filter, ID)). %% @doc Match given topic against the index and return the first match, or `false` if %% no match is found. -spec match(emqx_types:topic(), ets:table()) -> match(_ID) | false. match(Topic, Tab) -> - {Words, RPrefix} = match_init(Topic), - match(Words, RPrefix, Tab). - -match(Words, RPrefix, Tab) -> - Prefix = lists:reverse(RPrefix), - match(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Tab). - -match(K, Prefix, Words, RPrefix, Tab) -> - case match_next(Prefix, K, Words) of - true -> - K; - skip -> - match(ets:next(Tab, K), Prefix, Words, RPrefix, Tab); - stop -> - false; - Matched -> - match_rest(Matched, Words, RPrefix, Tab) - end. - -match_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, Tab) -> - % NOTE - % Fast-forward through identical words in the topic and the last key suffixes. - % This should save us a few redundant `ets:next` calls at the cost of slightly - % more complex match patterns. - match_rest(SLast, Rest, [W1 | RPrefix], Tab); -match_rest(SLast, [W | Rest], RPrefix, Tab) when is_list(SLast) -> - match(Rest, [W | RPrefix], Tab); -match_rest(plus, [W | Rest], RPrefix, Tab) -> - % NOTE - % There's '+' in the key suffix, meaning we should consider 2 alternatives: - % 1. Match the rest of the topic as if there was '+' in the current position. - % 2. Skip this key and try to match the topic as it is. - case match(Rest, ['+' | RPrefix], Tab) of - Match = {_, _} -> - Match; - false -> - match(Rest, [W | RPrefix], Tab) - end; -match_rest(_, [], _RPrefix, _Tab) -> - false. + emqx_trie_search:match(Topic, make_nextf(Tab)). %% @doc Match given topic against the index and return _all_ matches. %% If `unique` option is given, return only unique matches by record ID. --spec matches(emqx_types:topic(), ets:table(), _Opts :: [unique]) -> [match(_ID)]. matches(Topic, Tab, Opts) -> - {Words, RPrefix} = match_init(Topic), - AccIn = - case Opts of - [unique | _] -> #{}; - [] -> [] - end, - Matches = matches(Words, RPrefix, AccIn, Tab), - case Matches of - #{} -> maps:values(Matches); - _ -> Matches - end. - -matches(Words, RPrefix, Acc, Tab) -> - Prefix = lists:reverse(RPrefix), - matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab). - -matches(Words, RPrefix, K = {Filter, _}, Acc, Tab) -> - Prefix = lists:reverse(RPrefix), - case Prefix > Filter of - true -> - % NOTE: Prefix already greater than the last key seen, need to `ets:next/2`. - matches(ets:next(Tab, {Prefix, {}}), Prefix, Words, RPrefix, Acc, Tab); - false -> - % NOTE: Prefix is still less than or equal to the last key seen, reuse it. - matches(K, Prefix, Words, RPrefix, Acc, Tab) - end. - -matches(K, Prefix, Words, RPrefix, Acc, Tab) -> - case match_next(Prefix, K, Words) of - true -> - matches(ets:next(Tab, K), Prefix, Words, RPrefix, match_add(K, Acc), Tab); - skip -> - matches(ets:next(Tab, K), Prefix, Words, RPrefix, Acc, Tab); - stop -> - Acc; - Matched -> - % NOTE: Prserve next key on the stack to save on `ets:next/2` calls. - matches_rest(Matched, Words, RPrefix, K, Acc, Tab) - end. - -matches_rest([W1 | [W2 | _] = SLast], [W1 | [W2 | _] = Rest], RPrefix, K, Acc, Tab) -> - % NOTE - % Fast-forward through identical words in the topic and the last key suffixes. - % This should save us a few redundant `ets:next` calls at the cost of slightly - % more complex match patterns. - matches_rest(SLast, Rest, [W1 | RPrefix], K, Acc, Tab); -matches_rest(SLast, [W | Rest], RPrefix, K, Acc, Tab) when is_list(SLast) -> - matches(Rest, [W | RPrefix], K, Acc, Tab); -matches_rest(plus, [W | Rest], RPrefix, K, Acc, Tab) -> - % NOTE - % There's '+' in the key suffix, meaning we should accumulate all matches from - % each of 2 branches: - % 1. Match the rest of the topic as if there was '+' in the current position. - % 2. Skip this key and try to match the topic as it is. - NAcc = matches(Rest, ['+' | RPrefix], K, Acc, Tab), - matches(Rest, [W | RPrefix], K, NAcc, Tab); -matches_rest(_, [], _RPrefix, _K, Acc, _Tab) -> - Acc. - -match_add(K = {_Filter, ID}, Acc = #{}) -> - % NOTE: ensuring uniqueness by record ID - Acc#{ID => K}; -match_add(K, Acc) -> - [K | Acc]. - -match_next(Prefix, {Filter, _ID}, Suffix) -> - match_filter(Prefix, Filter, Suffix); -match_next(_, '$end_of_table', _) -> - stop. - -match_filter([], [], []) -> - % NOTE: we matched the topic exactly - true; -match_filter([], [], _Suffix) -> - % NOTE: we matched the prefix, but there may be more matches next - skip; -match_filter([], ['#'], _Suffix) -> - % NOTE: naturally, '#' < '+', so this is already optimal for `match/2` - true; -match_filter([], ['+' | _], _Suffix) -> - plus; -match_filter([], [_H | _] = Rest, _Suffix) -> - Rest; -match_filter([H | T1], [H | T2], Suffix) -> - match_filter(T1, T2, Suffix); -match_filter([H1 | _], [H2 | _], _Suffix) when H2 > H1 -> - % NOTE: we're strictly past the prefix, no need to continue - stop. - -match_init(Topic) -> - case words(Topic) of - [W = <<"$", _/bytes>> | Rest] -> - % NOTE - % This will effectively skip attempts to match special topics to `#` or `+/...`. - {Rest, [W]}; - Words -> - {Words, []} - end. + emqx_trie_search:matches(Topic, make_nextf(Tab), Opts). %% @doc Extract record ID from the match. -spec get_id(match(ID)) -> ID. -get_id({_Filter, {ID}}) -> - ID. +get_id(Key) -> + emqx_trie_search:get_id(Key). %% @doc Extract topic (or topic filter) from the match. -spec get_topic(match(_ID)) -> emqx_types:topic(). -get_topic({Filter, _ID}) -> - emqx_topic:join(Filter). +get_topic(Key) -> + emqx_trie_search:get_topic(Key). %% @doc Fetch the record associated with the match. %% NOTE: Only really useful for ETS tables where the record ID is the first element. @@ -226,17 +80,8 @@ get_topic({Filter, _ID}) -> get_record(K, Tab) -> ets:lookup_element(Tab, K, 2). -%% +key(TopicOrFilter, ID) -> + emqx_trie_search:make_key(TopicOrFilter, ID). --spec words(emqx_types:topic()) -> [word()]. -words(Topic) when is_binary(Topic) -> - % NOTE - % This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty - % tokens to ''. This is needed to keep ordering of words consistent with what - % `match_filter/3` expects. - [word(W) || W <- emqx_topic:tokens(Topic)]. - --spec word(binary()) -> word(). -word(<<"+">>) -> '+'; -word(<<"#">>) -> '#'; -word(Bin) -> Bin. +make_nextf(Tab) -> + fun(Key) -> ets:next(Tab, Key) end. diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl new file mode 100644 index 000000000..8d9463c26 --- /dev/null +++ b/apps/emqx/src/emqx_trie_search.erl @@ -0,0 +1,344 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +%% @doc Topic index for matching topics to topic filters. +%% +%% Works on top of ETS ordered_set table. Keys are tuples constructed from +%% parsed topic filters and record IDs, wrapped in a tuple to order them +%% strictly greater than unit tuple (`{}`). Existing table may be used if +%% existing keys will not collide with index keys. +%% +%% Designed to effectively answer questions like: +%% 1. Does any topic filter match given topic? +%% 2. Which records are associated with topic filters matching given topic? +%% 3. Which topic filters match given topic? +%% 4. Which record IDs are associated with topic filters matching given topic? +%% +%% Trie-search algorithm: +%% +%% Given a 3-level topic (e.g. a/b/c), if we leave out '#' for now, +%% all possible subscriptions of a/b/c can be enumerated as below: +%% +%% a/b/c +%% a/b/+ +%% a/+/c <--- subscribed +%% a/+/+ +%% +/b/c <--- subscribed +%% +/b/+ +%% +/+/c +%% +/+/+ <--- start searching upward from here +%% +%% Let's name this search space "Space1". +%% If we brute-force it, the scope would be 8 (2^3). +%% Meaning this has O(2^N) complexity (N being the level of topics). +%% +%% This clearly isn't going to work. +%% Should we then try to enumerate all subscribers instead? +%% If there are also other subscriptions, e.g. "+/x/y" and "+/b/0" +%% +%% a/+/c <--- match of a/b/c +%% +/x/n +%% ... +%% +/x/2 +%% +/x/1 +%% +/b/c <--- match of a/b/c +%% +/b/1 +%% +/b/0 +%% +%% Let's name it "Space2". +%% +%% This has O(M * L) complexity (M being the total number of subscriptions, +%% and L being the number of topic levels). +%% This is usually a lot smaller than "Space1", but still not very effective +%% if the collection size is e.g. 1 million. +%% +%% To make it more effective, we'll need to combine the two algorithms: +%% Use the ordered subscription topics' prefixes as starting points to make +%% guesses about whether or not the next word can be a '+', and skip-over +%% to the next possible match. +%% +%% NOTE: A prerequisite of the ordered collection is, it should be able +%% to find the *immediate-next* topic/filter with a given prefix. +%% +%% In the above example, we start from "+/b/0". When comparing "+/b/0" +%% with "a/b/c", we know the matching prefix is "+/b", meaning we can +%% start guessing if the next word is '+' or 'c': +%% * It can't be '+' because '+' < '0' +%% * It might be 'c' because 'c' > '0' +%% +%% So, we try to jump to the next topic which has a prefix of "+/b/c" +%% (this effectively means skipping over "+/b/1"). +%% +%% After "+/b/c" is found to be a matching filter, we move up: +%% * The next possible match is "a/+/+" according to Space1 +%% * The next subscription is "+/x/1" according to Space2 +%% +%% "a/+/+" is lexicographically greater than "+/x/+", so let's jump to +%% the immediate-next of 'a/+/+', which is "a/+/c", allowing us to skip +%% over all the ones starting with "+/x". +%% +%% If we take '#' into consideration, it's only one extra comparison to see +%% if a filter ends with '#'. +%% +%% In summary, the complexity of this algorithm is O(N * L) +%% N being the number of total matches, and L being the level of the topic. + +-module(emqx_trie_search). + +-export([ceiling/0, make_key/2]). +-export([match/2, matches/3, get_id/1, get_topic/1]). +-export_type([key/1, word/0, nextf/0, opts/0]). + +-type word() :: binary() | '+' | '#'. +-type base_key() :: {binary() | [word()], {}}. +-type key(ID) :: {binary() | [word()], {ID}}. +-type nextf() :: fun((key(_) | base_key()) -> key(_)). +-type opts() :: [unique | return_first]. + +%% Holds the constant values of each search. +-record(ctx, { + %% A function which can quickly find the immediate-next record of the given prefix + nextf :: nextf(), + %% The initial prefix to start searching from + %% if the input topic starts with a dollar-word, it's the first word like [<<"$SYS">>] + %% otherwise it's a [] + prefix0 :: [word()], + %% The initial words of a topic + words0 :: [word()], + %% Return as soon as there is one match found + return_first :: boolean() +}). + +%% Holds the variable parts of each search. +-record(acc, { + %% The current searching target topic/filter + target, + %% The number of moves. + %% This is used to check if the target has been moved + %% after attempting to append '+' to the searching prefix + moves = 0, + %% Search result accumulation + matches = [] +}). + +%% All valid utf8 bytes are less than 255. +-define(CEILING_TOPIC, <<255>>). +-define(CEILING, {?CEILING_TOPIC, {1}}). + +%% @doc Return a key which is greater than all other valid keys. +ceiling() -> + ?CEILING. + +%% @doc Make a search-key for the given topic. +-spec make_key(emqx_types:topic(), ID) -> key(ID). +make_key(Topic, ID) when is_binary(Topic) -> + Words = words(Topic), + Key = + case lists:any(fun erlang:is_atom/1, Words) of + true -> + %% it's a wildcard + {Words, {ID}}; + false -> + %% Not a wildcard. We do not split the topic + %% because they can be found with direct lookups. + %% it is also more compact in memory. + {Topic, {ID}} + end, + Key > ceiling() andalso throw({invalid_topic, Topic}), + Key. + +%% @doc Extract record ID from the match. +-spec get_id(key(ID)) -> ID. +get_id({_Filter, {ID}}) -> + ID. + +%% @doc Extract topic (or topic filter) from the match. +-spec get_topic(key(_ID)) -> emqx_types:topic(). +get_topic({Filter, _ID}) when is_list(Filter) -> + emqx_topic:join(Filter); +get_topic({Topic, _ID}) -> + Topic. + +%% Make the base-key which can be used to locate the desired search target. +base(Prefix) -> + {Prefix, {}}. + +%% Move the search target to the key next to the given Base. +move_up(#ctx{nextf = NextF}, #acc{moves = Moves} = Acc, Base) -> + Acc#acc{target = NextF(Base), moves = Moves + 1}. + +%% The current target key is a match, add it to the accumulation. +add(C, #acc{target = Key} = Acc) -> + add(C, Acc, Key). + +%% Add the given key to the accumulation. +add(#ctx{return_first = true}, _Acc, Key) -> + throw({return_first, Key}); +add(_C, #acc{matches = Matches} = Acc, Key) -> + Acc#acc{matches = match_add(Key, Matches)}. + +%% @doc Match given topic against the index and return the first match, or `false` if +%% no match is found. +-spec match(emqx_types:topic(), nextf()) -> false | key(_). +match(Topic, NextF) -> + try search(Topic, NextF, [return_first]) of + [] -> + false + catch + throw:{return_first, Res} -> + Res + end. + +%% @doc Match given topic against the index and return _all_ matches. +%% If `unique` option is given, return only unique matches by record ID. +-spec matches(emqx_types:topic(), nextf(), opts()) -> [key(_)]. +matches(Topic, NextF, Opts) -> + search(Topic, NextF, Opts). + +%% @doc Entrypoint of the search for a given topic. +search(Topic, NextF, Opts) -> + {Words, Prefix} = match_init(Topic), + Context = #ctx{ + nextf = NextF, + prefix0 = Prefix, + words0 = Words, + return_first = proplists:get_bool(return_first, Opts) + }, + Matches0 = + case proplists:get_bool(unique, Opts) of + true -> + #{}; + false -> + [] + end, + Acc = search_new(Context, base(Prefix), #acc{matches = Matches0}), + #acc{matches = Matches} = match_non_wildcards(Context, base(Topic), Acc), + case is_map(Matches) of + true -> + maps:values(Matches); + false -> + Matches + end. + +%% The recursive entrypoint of the trie-search algorithm. +%% Always start from the initial prefix and words. +search_new(#ctx{prefix0 = Prefix, words0 = Words0} = C, NewBase, Acc0) -> + #acc{target = {Filter, _}} = Acc = move_up(C, Acc0, NewBase), + case Prefix of + [] -> + %% This is not a '$' topic, start from '+' + search_plus(C, Words0, Filter, [], Acc); + [DollarWord] -> + %% Start from the '$' word + search_up(C, DollarWord, Words0, Filter, [], Acc) + end. + +%% Search to the bigger end of ordered collection of topics and topic-filters. +search_up(C, Word, Words, Filter, RPrefix, #acc{target = Base} = Acc) -> + case compare(Word, Filter, Words) of + {match, full} -> + search_new(C, Base, add(C, Acc)); + {match, prefix} -> + search_new(C, Base, Acc); + lower -> + Acc; + higher -> + NewBase = base(lists:reverse([Word | RPrefix])), + search_new(C, NewBase, Acc); + shorter -> + search_plus(C, Words, tl(Filter), [Word | RPrefix], Acc) + end. + +%% Try to use '+' as the next word in the prefix. +search_plus(C, [W, X | Words], [W, X | Filter], RPrefix, Acc) -> + %% Directly append the current word to the matching prefix (RPrefix). + %% Micro optimization: try not to call the next clause because + %% it is not a continuation. + search_plus(C, [X | Words], [X | Filter], [W | RPrefix], Acc); +search_plus(C, [W | Words], Filter, RPrefix, Acc) -> + M = Acc#acc.moves, + case search_up(C, '+', Words, Filter, RPrefix, Acc) of + #acc{moves = M1} = Acc1 when M1 =:= M -> + %% Keep searching for one which has W as the next word + search_up(C, W, Words, Filter, RPrefix, Acc1); + Acc1 -> + %% Already searched + Acc1 + end. + +%% Compare prefix word then the next words in suffix against the search-target +%% topic or topic-filter. +compare(_, NotFilter, _) when is_binary(NotFilter) -> + lower; +compare(H, [H | Filter], Words) -> + compare(Filter, Words); +compare(_, ['#'], _Words) -> + {match, full}; +compare(H1, [H2 | _T2], _Words) when H1 < H2 -> + lower; +compare(_H, [_ | _], _Words) -> + higher. + +%% Now compare the filter suffix and the topic suffix. +compare([], []) -> + {match, full}; +compare([], _Words) -> + {match, prefix}; +compare(['#'], _Words) -> + {match, full}; +compare([_ | _], []) -> + lower; +compare([_ | _], _Words) -> + %% cannot know if it's a match, lower, or higher, + %% must search with a longer prefix. + shorter. + +match_add(K = {_Filter, ID}, Acc = #{}) -> + % NOTE: ensuring uniqueness by record ID + Acc#{ID => K}; +match_add(K, Acc) -> + [K | Acc]. + +match_init(Topic) -> + case words(Topic) of + [W = <<"$", _/bytes>> | Rest] -> + % NOTE + % This will effectively skip attempts to match special topics to `#` or `+/...`. + {Rest, [W]}; + Words -> + {Words, []} + end. + +-spec words(emqx_types:topic()) -> [word()]. +words(Topic) when is_binary(Topic) -> + % NOTE + % This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty + % tokens to ''. This is needed to keep ordering of words consistent with what + % `match_filter/3` expects. + [word(W) || W <- emqx_topic:tokens(Topic)]. + +-spec word(binary()) -> word(). +word(<<"+">>) -> '+'; +word(<<"#">>) -> '#'; +word(Bin) -> Bin. + +match_non_wildcards(#ctx{nextf = NextF} = C, {Topic, _} = Base, Acc) -> + case NextF(Base) of + {Topic, _ID} = Key -> + match_non_wildcards(C, Key, add(C, Acc, Key)); + _Other -> + Acc + end. diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 9058f3597..dd1fc07f3 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -25,42 +25,71 @@ -import(emqx_proper_types, [scaled/2]). all() -> - emqx_common_test_helpers:all(?MODULE). + [ + {group, ets}, + {group, gb_tree} + ]. -t_insert(_) -> - Tab = emqx_topic_index:new(), - true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab), - true = emqx_topic_index:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab), - true = emqx_topic_index:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab), - ?assertEqual(<<"sensor/#">>, topic(match(<<"sensor">>, Tab))), - ?assertEqual(t_insert_3, id(match(<<"sensor">>, Tab))). +groups() -> + All = emqx_common_test_helpers:all(?MODULE), + [ + {ets, All}, + {gb_tree, All} + ]. -t_match(_) -> - Tab = emqx_topic_index:new(), - true = emqx_topic_index:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab), - true = emqx_topic_index:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab), - true = emqx_topic_index:insert(<<"sensor/#">>, t_match_3, <<>>, Tab), +init_per_group(ets, Config) -> + [{index_module, emqx_topic_index} | Config]; +init_per_group(gb_tree, Config) -> + [{index_module, emqx_topic_gbt} | Config]. + +end_per_group(_Group, _Config) -> + ok. + +get_module(Config) -> + proplists:get_value(index_module, Config). + +t_insert(Config) -> + M = get_module(Config), + Tab = M:new(), + true = M:insert(<<"sensor/1/metric/2">>, t_insert_1, <<>>, Tab), + true = M:insert(<<"sensor/+/#">>, t_insert_2, <<>>, Tab), + true = M:insert(<<"sensor/#">>, t_insert_3, <<>>, Tab), + ?assertEqual(<<"sensor/#">>, topic(match(M, <<"sensor">>, Tab))), + ?assertEqual(t_insert_3, id(match(M, <<"sensor">>, Tab))). + +t_match(Config) -> + M = get_module(Config), + Tab = M:new(), + true = M:insert(<<"sensor/1/metric/2">>, t_match_1, <<>>, Tab), + true = M:insert(<<"sensor/+/#">>, t_match_2, <<>>, Tab), + true = M:insert(<<"sensor/#">>, t_match_3, <<>>, Tab), ?assertMatch( [<<"sensor/#">>, <<"sensor/+/#">>], - [topic(M) || M <- matches(<<"sensor/1">>, Tab)] + [topic(X) || X <- matches(M, <<"sensor/1">>, Tab)] ). -t_match2(_) -> - Tab = emqx_topic_index:new(), - true = emqx_topic_index:insert(<<"#">>, t_match2_1, <<>>, Tab), - true = emqx_topic_index:insert(<<"+/#">>, t_match2_2, <<>>, Tab), - true = emqx_topic_index:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab), +t_match2(Config) -> + M = get_module(Config), + Tab = M:new(), + true = M:insert(<<"#">>, t_match2_1, <<>>, Tab), + true = M:insert(<<"+/#">>, t_match2_2, <<>>, Tab), + true = M:insert(<<"+/+/#">>, t_match2_3, <<>>, Tab), ?assertEqual( [<<"#">>, <<"+/#">>, <<"+/+/#">>], - [topic(M) || M <- matches(<<"a/b/c">>, Tab)] + [topic(X) || X <- matches(M, <<"a/b/c">>, Tab)] ), ?assertEqual( false, - emqx_topic_index:match(<<"$SYS/broker/zenmq">>, Tab) + M:match(<<"$SYS/broker/zenmq">>, Tab) + ), + ?assertEqual( + [], + matches(M, <<"$SYS/broker/zenmq">>, Tab) ). -t_match3(_) -> - Tab = emqx_topic_index:new(), +t_match3(Config) -> + M = get_module(Config), + Tab = M:new(), Records = [ {<<"d/#">>, t_match3_1}, {<<"a/b/+">>, t_match3_2}, @@ -69,37 +98,39 @@ t_match3(_) -> {<<"$SYS/#">>, t_match3_sys} ], lists:foreach( - fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end, Records ), - Matched = matches(<<"a/b/c">>, Tab), + Matched = matches(M, <<"a/b/c">>, Tab), case length(Matched) of 3 -> ok; _ -> error({unexpected, Matched}) end, ?assertEqual( t_match3_sys, - id(match(<<"$SYS/a/b/c">>, Tab)) + id(match(M, <<"$SYS/a/b/c">>, Tab)) ). -t_match4(_) -> - Tab = emqx_topic_index:new(), +t_match4(Config) -> + M = get_module(Config), + Tab = M:new(), Records = [{<<"/#">>, t_match4_1}, {<<"/+">>, t_match4_2}, {<<"/+/a/b/c">>, t_match4_3}], lists:foreach( - fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end, Records ), ?assertEqual( [<<"/#">>, <<"/+">>], - [topic(M) || M <- matches(<<"/">>, Tab)] + [topic(X) || X <- matches(M, <<"/">>, Tab)] ), ?assertEqual( [<<"/#">>, <<"/+/a/b/c">>], - [topic(M) || M <- matches(<<"/0/a/b/c">>, Tab)] + [topic(X) || X <- matches(M, <<"/0/a/b/c">>, Tab)] ). -t_match5(_) -> - Tab = emqx_topic_index:new(), +t_match5(Config) -> + M = get_module(Config), + Tab = M:new(), T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, Records = [ {<<"#">>, t_match5_1}, @@ -107,58 +138,63 @@ t_match5(_) -> {<>, t_match5_3} ], lists:foreach( - fun({Topic, ID}) -> emqx_topic_index:insert(Topic, ID, <<>>, Tab) end, + fun({Topic, ID}) -> M:insert(Topic, ID, <<>>, Tab) end, Records ), ?assertEqual( [<<"#">>, <>], - [topic(M) || M <- matches(T, Tab)] + [topic(X) || X <- matches(M, T, Tab)] ), ?assertEqual( [<<"#">>, <>, <>], - [topic(M) || M <- matches(<>, Tab)] + [topic(X) || X <- matches(M, <>, Tab)] ). -t_match6(_) -> - Tab = emqx_topic_index:new(), +t_match6(Config) -> + M = get_module(Config), + Tab = M:new(), T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, W = <<"+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/+/#">>, - emqx_topic_index:insert(W, ID = t_match6, <<>>, Tab), - ?assertEqual(ID, id(match(T, Tab))). + M:insert(W, ID = t_match6, <<>>, Tab), + ?assertEqual(ID, id(match(M, T, Tab))). -t_match7(_) -> - Tab = emqx_topic_index:new(), +t_match7(Config) -> + M = get_module(Config), + Tab = M:new(), T = <<"a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z">>, W = <<"a/+/c/+/e/+/g/+/i/+/k/+/m/+/o/+/q/+/s/+/u/+/w/+/y/+/#">>, - emqx_topic_index:insert(W, t_match7, <<>>, Tab), - ?assertEqual(W, topic(match(T, Tab))). + M:insert(W, t_match7, <<>>, Tab), + ?assertEqual(W, topic(match(M, T, Tab))). -t_match_fast_forward(_) -> - Tab = emqx_topic_index:new(), - emqx_topic_index:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab), - emqx_topic_index:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/c/+">>, id3, <<>>, Tab), +t_match_fast_forward(Config) -> + M = get_module(Config), + Tab = M:new(), + M:insert(<<"a/b/1/2/3/4/5/6/7/8/9/#">>, id1, <<>>, Tab), + M:insert(<<"z/y/x/+/+">>, id2, <<>>, Tab), + M:insert(<<"a/b/c/+">>, id3, <<>>, Tab), % dbg:tracer(), % dbg:p(all, c), % dbg:tpl({ets, next, '_'}, x), - ?assertEqual(id1, id(match(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))), - ?assertEqual([id1], [id(M) || M <- matches(<<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]). + ?assertEqual(id1, id(match(M, <<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab))), + ?assertEqual([id1], [id(X) || X <- matches(M, <<"a/b/1/2/3/4/5/6/7/8/9/0">>, Tab)]). -t_match_unique(_) -> - Tab = emqx_topic_index:new(), - emqx_topic_index:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab), - emqx_topic_index:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab), +t_match_unique(Config) -> + M = get_module(Config), + Tab = M:new(), + M:insert(<<"a/b/c">>, t_match_id1, <<>>, Tab), + M:insert(<<"a/b/+">>, t_match_id1, <<>>, Tab), + M:insert(<<"a/b/c/+">>, t_match_id2, <<>>, Tab), ?assertEqual( [t_match_id1, t_match_id1], - [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [])] + [id(X) || X <- matches(M, <<"a/b/c">>, Tab, [])] ), ?assertEqual( [t_match_id1], - [id(M) || M <- emqx_topic_index:matches(<<"a/b/c">>, Tab, [unique])] + [id(X) || X <- matches(M, <<"a/b/c">>, Tab, [unique])] ). -t_match_wildcard_edge_cases(_) -> +t_match_wildcard_edge_cases(Config) -> + M = get_module(Config), CommonTopics = [ <<"a/b">>, <<"a/b/#">>, @@ -179,32 +215,33 @@ t_match_wildcard_edge_cases(_) -> {[<<"/">>, <<"+">>], <<"a">>, [2]} ], F = fun({Topics, TopicName, Expected}) -> - Tab = emqx_topic_index:new(), - _ = [emqx_topic_index:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)], + Tab = M:new(), + _ = [M:insert(T, N, <<>>, Tab) || {N, T} <- lists:enumerate(Topics)], ?assertEqual( lists:last(Expected), - id(emqx_topic_index:match(TopicName, Tab)), + id(M:match(TopicName, Tab)), #{"Base topics" => Topics, "Topic name" => TopicName} ), ?assertEqual( Expected, - [id(M) || M <- emqx_topic_index:matches(TopicName, Tab, [unique])], + [id(X) || X <- matches(M, TopicName, Tab, [unique])], #{"Base topics" => Topics, "Topic name" => TopicName} ) end, lists:foreach(F, Datasets). -t_prop_matches(_) -> +t_prop_matches(Config) -> + M = get_module(Config), ?assert( proper:quickcheck( - topic_matches_prop(), + topic_matches_prop(M), [{max_size, 100}, {numtests, 100}] ) ), Statistics = [{C, account(C)} || C <- [filters, topics, matches, maxhits]], ct:pal("Statistics: ~p", [maps:from_list(Statistics)]). -topic_matches_prop() -> +topic_matches_prop(M) -> ?FORALL( % Generate a longer list of topics and a shorter list of topic filter patterns. #{ @@ -219,12 +256,12 @@ topic_matches_prop() -> patterns => list(topic_filter_pattern_t()) }), begin - Tab = emqx_topic_index:new(), + Tab = M:new(), Topics = [emqx_topic:join(T) || T <- TTopics], % Produce topic filters from generated topics and patterns. % Number of filters is equal to the number of patterns, most of the time. Filters = lists:enumerate(mk_filters(Pats, TTopics)), - _ = [emqx_topic_index:insert(F, N, <<>>, Tab) || {N, F} <- Filters], + _ = [M:insert(F, N, <<>>, Tab) || {N, F} <- Filters], % Gather some basic statistics _ = account(filters, length(Filters)), _ = account(topics, NTopics = length(Topics)), @@ -233,7 +270,7 @@ topic_matches_prop() -> % matching it against the list of filters one by one. lists:all( fun(Topic) -> - Ids1 = [id(M) || M <- emqx_topic_index:matches(Topic, Tab, [unique])], + Ids1 = [id(X) || X <- matches(M, Topic, Tab, [unique])], Ids2 = lists:filtermap( fun({N, F}) -> case emqx_topic:match(Topic, F) of @@ -277,17 +314,20 @@ account(Counter) -> %% -match(T, Tab) -> - emqx_topic_index:match(T, Tab). +match(M, T, Tab) -> + M:match(T, Tab). -matches(T, Tab) -> - lists:sort(emqx_topic_index:matches(T, Tab, [])). +matches(M, T, Tab) -> + lists:sort(M:matches(T, Tab, [])). + +matches(M, T, Tab, Opts) -> + M:matches(T, Tab, Opts). id(Match) -> - emqx_topic_index:get_id(Match). + emqx_trie_search:get_id(Match). topic(Match) -> - emqx_topic_index:get_topic(Match). + emqx_trie_search:get_topic(Match). %% From a1e66356147245a57ba1fc58d888927cbfdd3093 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Wed, 23 Aug 2023 10:12:46 +0200 Subject: [PATCH 04/12] refactor(topic_index): no forced ceiling entry in index table --- apps/emqx/src/emqx_topic_gbt.erl | 10 +++++-- apps/emqx/src/emqx_topic_index.erl | 4 +-- apps/emqx/src/emqx_trie_search.erl | 45 ++++++++++++------------------ 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/apps/emqx/src/emqx_topic_gbt.erl b/apps/emqx/src/emqx_topic_gbt.erl index da6320bc7..dade70cee 100644 --- a/apps/emqx/src/emqx_topic_gbt.erl +++ b/apps/emqx/src/emqx_topic_gbt.erl @@ -104,12 +104,16 @@ gbt_update(Name, Tree) -> true. gbt_next(nil, _Input) -> - emqx_trie_search:ceiling(); + '$end_of_table'; gbt_next({P, _V, _Smaller, Bigger}, K) when K >= P -> gbt_next(Bigger, K); gbt_next({P, _V, Smaller, _Bigger}, K) -> - NextKey = gbt_next(Smaller, K), - min(P, NextKey). + case gbt_next(Smaller, K) of + '$end_of_table' -> + P; + NextKey -> + NextKey + end. make_nextf(Name) -> {_SizeWeDontCare, TheTree} = gbt(Name), diff --git a/apps/emqx/src/emqx_topic_index.erl b/apps/emqx/src/emqx_topic_index.erl index e1da5b3b7..acbf36627 100644 --- a/apps/emqx/src/emqx_topic_index.erl +++ b/apps/emqx/src/emqx_topic_index.erl @@ -35,9 +35,7 @@ %% Usable mostly for testing purposes. -spec new() -> ets:table(). new() -> - T = ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]), - ets:insert(T, {emqx_trie_search:ceiling(), []}), - T. + ets:new(?MODULE, [public, ordered_set, {read_concurrency, true}]). %% @doc Insert a new entry into the index that associates given topic filter to given %% record ID, and attaches arbitrary record to the entry. This allows users to choose diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 8d9463c26..a25f3c9f4 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -98,14 +98,14 @@ -module(emqx_trie_search). --export([ceiling/0, make_key/2]). +-export([make_key/2]). -export([match/2, matches/3, get_id/1, get_topic/1]). -export_type([key/1, word/0, nextf/0, opts/0]). -type word() :: binary() | '+' | '#'. -type base_key() :: {binary() | [word()], {}}. -type key(ID) :: {binary() | [word()], {ID}}. --type nextf() :: fun((key(_) | base_key()) -> key(_)). +-type nextf() :: fun((key(_) | base_key()) -> '$end_of_table' | key(_)). -type opts() :: [unique | return_first]. %% Holds the constant values of each search. @@ -134,31 +134,20 @@ matches = [] }). -%% All valid utf8 bytes are less than 255. --define(CEILING_TOPIC, <<255>>). --define(CEILING, {?CEILING_TOPIC, {1}}). - -%% @doc Return a key which is greater than all other valid keys. -ceiling() -> - ?CEILING. - %% @doc Make a search-key for the given topic. -spec make_key(emqx_types:topic(), ID) -> key(ID). make_key(Topic, ID) when is_binary(Topic) -> Words = words(Topic), - Key = - case lists:any(fun erlang:is_atom/1, Words) of - true -> - %% it's a wildcard - {Words, {ID}}; - false -> - %% Not a wildcard. We do not split the topic - %% because they can be found with direct lookups. - %% it is also more compact in memory. - {Topic, {ID}} - end, - Key > ceiling() andalso throw({invalid_topic, Topic}), - Key. + case lists:any(fun erlang:is_atom/1, Words) of + true -> + %% it's a wildcard + {Words, {ID}}; + false -> + %% Not a wildcard. We do not split the topic + %% because they can be found with direct lookups. + %% it is also more compact in memory. + {Topic, {ID}} + end. %% @doc Extract record ID from the match. -spec get_id(key(ID)) -> ID. @@ -236,12 +225,14 @@ search(Topic, NextF, Opts) -> %% The recursive entrypoint of the trie-search algorithm. %% Always start from the initial prefix and words. search_new(#ctx{prefix0 = Prefix, words0 = Words0} = C, NewBase, Acc0) -> - #acc{target = {Filter, _}} = Acc = move_up(C, Acc0, NewBase), - case Prefix of - [] -> + case move_up(C, Acc0, NewBase) of + #acc{target = '$end_of_table'} = Acc -> + Acc; + #acc{target = {Filter, _}} = Acc when Prefix =:= [] -> %% This is not a '$' topic, start from '+' search_plus(C, Words0, Filter, [], Acc); - [DollarWord] -> + #acc{target = {Filter, _}} = Acc -> + [DollarWord] = Prefix, %% Start from the '$' word search_up(C, DollarWord, Words0, Filter, [], Acc) end. From a30d87e14f3ab6ce7635159c47df1275861d9598 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 24 Aug 2023 11:51:34 +0200 Subject: [PATCH 05/12] refactor(topic_index): remove more unnecessary next calls also avoid using records (setelement) for recursive return values --- apps/emqx/src/emqx_trie_search.erl | 119 ++++++++++------------ apps/emqx/test/emqx_topic_index_SUITE.erl | 15 +++ 2 files changed, 71 insertions(+), 63 deletions(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index a25f3c9f4..36ccd656b 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -102,12 +102,15 @@ -export([match/2, matches/3, get_id/1, get_topic/1]). -export_type([key/1, word/0, nextf/0, opts/0]). +-define(END, '$end_of_table'). + -type word() :: binary() | '+' | '#'. -type base_key() :: {binary() | [word()], {}}. -type key(ID) :: {binary() | [word()], {ID}}. --type nextf() :: fun((key(_) | base_key()) -> '$end_of_table' | key(_)). +-type nextf() :: fun((key(_) | base_key()) -> ?END | key(_)). -type opts() :: [unique | return_first]. + %% Holds the constant values of each search. -record(ctx, { %% A function which can quickly find the immediate-next record of the given prefix @@ -122,18 +125,6 @@ return_first :: boolean() }). -%% Holds the variable parts of each search. --record(acc, { - %% The current searching target topic/filter - target, - %% The number of moves. - %% This is used to check if the target has been moved - %% after attempting to append '+' to the searching prefix - moves = 0, - %% Search result accumulation - matches = [] -}). - %% @doc Make a search-key for the given topic. -spec make_key(emqx_types:topic(), ID) -> key(ID). make_key(Topic, ID) when is_binary(Topic) -> @@ -166,18 +157,14 @@ base(Prefix) -> {Prefix, {}}. %% Move the search target to the key next to the given Base. -move_up(#ctx{nextf = NextF}, #acc{moves = Moves} = Acc, Base) -> - Acc#acc{target = NextF(Base), moves = Moves + 1}. - -%% The current target key is a match, add it to the accumulation. -add(C, #acc{target = Key} = Acc) -> - add(C, Acc, Key). +move_up(#ctx{nextf = NextF}, Base) -> + NextF(Base). %% Add the given key to the accumulation. add(#ctx{return_first = true}, _Acc, Key) -> throw({return_first, Key}); -add(_C, #acc{matches = Matches} = Acc, Key) -> - Acc#acc{matches = match_add(Key, Matches)}. +add(_C, Acc, Key) -> + match_add(Key, Acc). %% @doc Match given topic against the index and return the first match, or `false` if %% no match is found. @@ -206,69 +193,69 @@ search(Topic, NextF, Opts) -> words0 = Words, return_first = proplists:get_bool(return_first, Opts) }, - Matches0 = + Acc0 = case proplists:get_bool(unique, Opts) of true -> #{}; false -> [] end, - Acc = search_new(Context, base(Prefix), #acc{matches = Matches0}), - #acc{matches = Matches} = match_non_wildcards(Context, base(Topic), Acc), - case is_map(Matches) of + {MaybeEnd, Acc1} = search_new(Context, base(Prefix), Acc0), + Acc = match_topics(Context, Topic, MaybeEnd, Acc1), + case is_map(Acc) of true -> - maps:values(Matches); + maps:values(Acc); false -> - Matches + Acc end. %% The recursive entrypoint of the trie-search algorithm. %% Always start from the initial prefix and words. -search_new(#ctx{prefix0 = Prefix, words0 = Words0} = C, NewBase, Acc0) -> - case move_up(C, Acc0, NewBase) of - #acc{target = '$end_of_table'} = Acc -> - Acc; - #acc{target = {Filter, _}} = Acc when Prefix =:= [] -> - %% This is not a '$' topic, start from '+' - search_plus(C, Words0, Filter, [], Acc); - #acc{target = {Filter, _}} = Acc -> - [DollarWord] = Prefix, - %% Start from the '$' word - search_up(C, DollarWord, Words0, Filter, [], Acc) - end. +search_new(C, NewBase, Acc) -> + search_moved(C, move_up(C, NewBase), Acc). + +search_moved(_, ?END, Acc) -> + {?END, Acc}; +search_moved(#ctx{prefix0 = [], words0 = Words0} = C, {Filter, _} = T, Acc) -> + %% This is not a '$' topic, start from '+' + search_plus(C, Words0, Filter, [], T, Acc); +search_moved(#ctx{prefix0 = Prefix, words0 = Words0} = C, {Filter, _} = T, Acc) -> + [DollarWord] = Prefix, + %% Start from the '$' word + search_up(C, DollarWord, Words0, Filter, [], T, Acc). %% Search to the bigger end of ordered collection of topics and topic-filters. -search_up(C, Word, Words, Filter, RPrefix, #acc{target = Base} = Acc) -> +search_up(C, Word, Words, Filter, RPrefix, T, Acc) -> case compare(Word, Filter, Words) of {match, full} -> - search_new(C, Base, add(C, Acc)); + search_new(C, T, add(C, Acc, T)); {match, prefix} -> - search_new(C, Base, Acc); + search_new(C, T, Acc); lower -> - Acc; + {T, Acc}; higher -> NewBase = base(lists:reverse([Word | RPrefix])), search_new(C, NewBase, Acc); shorter -> - search_plus(C, Words, tl(Filter), [Word | RPrefix], Acc) + search_plus(C, Words, tl(Filter), [Word | RPrefix], T, Acc) end. %% Try to use '+' as the next word in the prefix. -search_plus(C, [W, X | Words], [W, X | Filter], RPrefix, Acc) -> +search_plus(C, [W, X | Words], [W, X | Filter], RPrefix, T, Acc) -> %% Directly append the current word to the matching prefix (RPrefix). %% Micro optimization: try not to call the next clause because %% it is not a continuation. - search_plus(C, [X | Words], [X | Filter], [W | RPrefix], Acc); -search_plus(C, [W | Words], Filter, RPrefix, Acc) -> - M = Acc#acc.moves, - case search_up(C, '+', Words, Filter, RPrefix, Acc) of - #acc{moves = M1} = Acc1 when M1 =:= M -> - %% Keep searching for one which has W as the next word - search_up(C, W, Words, Filter, RPrefix, Acc1); - Acc1 -> - %% Already searched - Acc1 - end. + search_plus(C, [X | Words], [X | Filter], [W | RPrefix], T, Acc); +search_plus(C, [W | Words], ['+' | _] = Filter, RPrefix, T, Acc) -> + case search_up(C, '+', Words, Filter, RPrefix, T, Acc) of + {T, Acc} -> + search_up(C, W, Words, Filter, RPrefix, T, Acc); + TargetMoved -> + TargetMoved + end; +search_plus(C, [W | Words], Filter, RPrefix, T, Acc) -> + %% not a plus + search_up(C, W, Words, Filter, RPrefix, T, Acc). %% Compare prefix word then the next words in suffix against the search-target %% topic or topic-filter. @@ -326,10 +313,16 @@ word(<<"+">>) -> '+'; word(<<"#">>) -> '#'; word(Bin) -> Bin. -match_non_wildcards(#ctx{nextf = NextF} = C, {Topic, _} = Base, Acc) -> - case NextF(Base) of - {Topic, _ID} = Key -> - match_non_wildcards(C, Key, add(C, Acc, Key)); - _Other -> - Acc - end. +%% match non-wildcard topics +match_topics(#ctx{nextf = NextF} = C, Topic, {Topic, _} = Key, Acc) -> + %% found a topic match + match_topics(C, Topic, NextF(Key), add(C, Acc, Key)); +match_topics(#ctx{nextf = NextF} = C, Topic, {F, _}, Acc) when F < Topic -> + %% the last key is a filter, try jump to the topic + match_topics(C, Topic, NextF(base(Topic)), Acc); +match_topics(#ctx{nextf = NextF} = C, Topic, continue, Acc) -> + %% the last key is a '+/...' wildcard + match_topics(C, Topic, NextF(base(Topic)), Acc); +match_topics(_C, _Topic, _Key, Acc) -> + %% gone pass the topic + Acc. diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index dd1fc07f3..eb0ac0e40 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -166,6 +166,21 @@ t_match7(Config) -> M:insert(W, t_match7, <<>>, Tab), ?assertEqual(W, topic(match(M, T, Tab))). +t_match8(Config) -> + M = get_module(Config), + Tab = M:new(), + Filters = [<<"+">>, <<"dev/global/sensor">>, <<"dev/+/sensor/#">>], + IDs = [1,2,3], + Keys = [{F, ID} || F <- Filters, ID <- IDs], + lists:foreach(fun({F, ID}) -> + M:insert(F, ID, <<>>, Tab) + end, Keys), + Topic = <<"dev/global/sensor">>, + Matches = lists:sort(matches(M, Topic, Tab)), + ?assertEqual([<<"dev/+/sensor/#">>, <<"dev/+/sensor/#">>, <<"dev/+/sensor/#">>, + <<"dev/global/sensor">>, <<"dev/global/sensor">>, <<"dev/global/sensor">>], + [emqx_topic_index:get_topic(Match) || Match <- Matches]). + t_match_fast_forward(Config) -> M = get_module(Config), Tab = M:new(), From 62423b0b12deac9641152a5ff01236660fbf3746 Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Thu, 24 Aug 2023 13:08:24 +0200 Subject: [PATCH 06/12] refactor(topic_index): less special handling for leading $ words --- apps/emqx/src/emqx_trie_search.erl | 92 ++++++++++------------- apps/emqx/test/emqx_topic_index_SUITE.erl | 25 ++++-- 2 files changed, 56 insertions(+), 61 deletions(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 36ccd656b..71bc8d8a0 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -16,10 +16,10 @@ %% @doc Topic index for matching topics to topic filters. %% -%% Works on top of ETS ordered_set table. Keys are tuples constructed from -%% parsed topic filters and record IDs, wrapped in a tuple to order them -%% strictly greater than unit tuple (`{}`). Existing table may be used if -%% existing keys will not collide with index keys. +%% Works on top of a ordered collection data set, such as ETS ordered_set table. +%% Keys are tuples constructed from parsed topic filters and record IDs, +%% wrapped in a tuple to order them strictly greater than unit tuple (`{}`). +%% Existing table may be used if existing keys will not collide with index keys. %% %% Designed to effectively answer questions like: %% 1. Does any topic filter match given topic? @@ -110,15 +110,10 @@ -type nextf() :: fun((key(_) | base_key()) -> ?END | key(_)). -type opts() :: [unique | return_first]. - %% Holds the constant values of each search. -record(ctx, { %% A function which can quickly find the immediate-next record of the given prefix nextf :: nextf(), - %% The initial prefix to start searching from - %% if the input topic starts with a dollar-word, it's the first word like [<<"$SYS">>] - %% otherwise it's a [] - prefix0 :: [word()], %% The initial words of a topic words0 :: [word()], %% Return as soon as there is one match found @@ -129,7 +124,7 @@ -spec make_key(emqx_types:topic(), ID) -> key(ID). make_key(Topic, ID) when is_binary(Topic) -> Words = words(Topic), - case lists:any(fun erlang:is_atom/1, Words) of + case emqx_topic:wildcard(Words) of true -> %% it's a wildcard {Words, {ID}}; @@ -186,10 +181,9 @@ matches(Topic, NextF, Opts) -> %% @doc Entrypoint of the search for a given topic. search(Topic, NextF, Opts) -> - {Words, Prefix} = match_init(Topic), + Words = words(Topic), Context = #ctx{ nextf = NextF, - prefix0 = Prefix, words0 = Words, return_first = proplists:get_bool(return_first, Opts) }, @@ -200,7 +194,15 @@ search(Topic, NextF, Opts) -> false -> [] end, - {MaybeEnd, Acc1} = search_new(Context, base(Prefix), Acc0), + Base = + case hd(Words) of + <<$$, _/binary>> -> + %% skip all filters starts with # or + + base([hd(Words)]); + _ -> + base([]) + end, + {MaybeEnd, Acc1} = search_new(Context, Base, Acc0), Acc = match_topics(Context, Topic, MaybeEnd, Acc1), case is_map(Acc) of true -> @@ -211,18 +213,30 @@ search(Topic, NextF, Opts) -> %% The recursive entrypoint of the trie-search algorithm. %% Always start from the initial prefix and words. -search_new(C, NewBase, Acc) -> - search_moved(C, move_up(C, NewBase), Acc). +search_new(#ctx{words0 = Words} = C, NewBase, Acc) -> + case move_up(C, NewBase) of + ?END -> + {?END, Acc}; + {Filter, _} = T -> + search_plus(C, Words, Filter, [], T, Acc) + end. -search_moved(_, ?END, Acc) -> - {?END, Acc}; -search_moved(#ctx{prefix0 = [], words0 = Words0} = C, {Filter, _} = T, Acc) -> - %% This is not a '$' topic, start from '+' - search_plus(C, Words0, Filter, [], T, Acc); -search_moved(#ctx{prefix0 = Prefix, words0 = Words0} = C, {Filter, _} = T, Acc) -> - [DollarWord] = Prefix, - %% Start from the '$' word - search_up(C, DollarWord, Words0, Filter, [], T, Acc). +%% Try to use '+' as the next word in the prefix. +search_plus(C, [W, X | Words], [W, X | Filter], RPrefix, T, Acc) -> + %% Directly append the current word to the matching prefix (RPrefix). + %% Micro optimization: try not to call the next clause because + %% it is not a continuation. + search_plus(C, [X | Words], [X | Filter], [W | RPrefix], T, Acc); +search_plus(C, [W | Words], ['+' | _] = Filter, RPrefix, T, Acc) -> + case search_up(C, '+', Words, Filter, RPrefix, T, Acc) of + {T, Acc1} -> + search_up(C, W, Words, Filter, RPrefix, T, Acc1); + TargetMoved -> + TargetMoved + end; +search_plus(C, [W | Words], Filter, RPrefix, T, Acc) -> + %% not a plus + search_up(C, W, Words, Filter, RPrefix, T, Acc). %% Search to the bigger end of ordered collection of topics and topic-filters. search_up(C, Word, Words, Filter, RPrefix, T, Acc) -> @@ -240,23 +254,6 @@ search_up(C, Word, Words, Filter, RPrefix, T, Acc) -> search_plus(C, Words, tl(Filter), [Word | RPrefix], T, Acc) end. -%% Try to use '+' as the next word in the prefix. -search_plus(C, [W, X | Words], [W, X | Filter], RPrefix, T, Acc) -> - %% Directly append the current word to the matching prefix (RPrefix). - %% Micro optimization: try not to call the next clause because - %% it is not a continuation. - search_plus(C, [X | Words], [X | Filter], [W | RPrefix], T, Acc); -search_plus(C, [W | Words], ['+' | _] = Filter, RPrefix, T, Acc) -> - case search_up(C, '+', Words, Filter, RPrefix, T, Acc) of - {T, Acc} -> - search_up(C, W, Words, Filter, RPrefix, T, Acc); - TargetMoved -> - TargetMoved - end; -search_plus(C, [W | Words], Filter, RPrefix, T, Acc) -> - %% not a plus - search_up(C, W, Words, Filter, RPrefix, T, Acc). - %% Compare prefix word then the next words in suffix against the search-target %% topic or topic-filter. compare(_, NotFilter, _) when is_binary(NotFilter) -> @@ -290,16 +287,6 @@ match_add(K = {_Filter, ID}, Acc = #{}) -> match_add(K, Acc) -> [K | Acc]. -match_init(Topic) -> - case words(Topic) of - [W = <<"$", _/bytes>> | Rest] -> - % NOTE - % This will effectively skip attempts to match special topics to `#` or `+/...`. - {Rest, [W]}; - Words -> - {Words, []} - end. - -spec words(emqx_types:topic()) -> [word()]. words(Topic) when is_binary(Topic) -> % NOTE @@ -320,9 +307,6 @@ match_topics(#ctx{nextf = NextF} = C, Topic, {Topic, _} = Key, Acc) -> match_topics(#ctx{nextf = NextF} = C, Topic, {F, _}, Acc) when F < Topic -> %% the last key is a filter, try jump to the topic match_topics(C, Topic, NextF(base(Topic)), Acc); -match_topics(#ctx{nextf = NextF} = C, Topic, continue, Acc) -> - %% the last key is a '+/...' wildcard - match_topics(C, Topic, NextF(base(Topic)), Acc); match_topics(_C, _Topic, _Key, Acc) -> %% gone pass the topic Acc. diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index eb0ac0e40..551cc2438 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -170,16 +170,27 @@ t_match8(Config) -> M = get_module(Config), Tab = M:new(), Filters = [<<"+">>, <<"dev/global/sensor">>, <<"dev/+/sensor/#">>], - IDs = [1,2,3], + IDs = [1, 2, 3], Keys = [{F, ID} || F <- Filters, ID <- IDs], - lists:foreach(fun({F, ID}) -> - M:insert(F, ID, <<>>, Tab) - end, Keys), + lists:foreach( + fun({F, ID}) -> + M:insert(F, ID, <<>>, Tab) + end, + Keys + ), Topic = <<"dev/global/sensor">>, Matches = lists:sort(matches(M, Topic, Tab)), - ?assertEqual([<<"dev/+/sensor/#">>, <<"dev/+/sensor/#">>, <<"dev/+/sensor/#">>, - <<"dev/global/sensor">>, <<"dev/global/sensor">>, <<"dev/global/sensor">>], - [emqx_topic_index:get_topic(Match) || Match <- Matches]). + ?assertEqual( + [ + <<"dev/+/sensor/#">>, + <<"dev/+/sensor/#">>, + <<"dev/+/sensor/#">>, + <<"dev/global/sensor">>, + <<"dev/global/sensor">>, + <<"dev/global/sensor">> + ], + [emqx_topic_index:get_topic(Match) || Match <- Matches] + ). t_match_fast_forward(Config) -> M = get_module(Config), From cf45e80c71c008e604a5f95072015d80fae86eeb Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 25 Aug 2023 01:47:11 +0400 Subject: [PATCH 07/12] feat(topicidx): iterate on trie search implementation This improves matching performance and decreases GC pressure on synthetic workloads. --- apps/emqx/src/emqx_trie_search.erl | 247 +++++++++++++--------- apps/emqx/test/emqx_topic_index_SUITE.erl | 13 ++ rebar.config.erl | 3 +- 3 files changed, 158 insertions(+), 105 deletions(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 71bc8d8a0..95174f292 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -110,16 +110,6 @@ -type nextf() :: fun((key(_) | base_key()) -> ?END | key(_)). -type opts() :: [unique | return_first]. -%% Holds the constant values of each search. --record(ctx, { - %% A function which can quickly find the immediate-next record of the given prefix - nextf :: nextf(), - %% The initial words of a topic - words0 :: [word()], - %% Return as soon as there is one match found - return_first :: boolean() -}). - %% @doc Make a search-key for the given topic. -spec make_key(emqx_types:topic(), ID) -> key(ID). make_key(Topic, ID) when is_binary(Topic) -> @@ -147,29 +137,29 @@ get_topic({Filter, _ID}) when is_list(Filter) -> get_topic({Topic, _ID}) -> Topic. +-compile({inline, [base/1, move_up/2, match_add/2, compare/3]}). + %% Make the base-key which can be used to locate the desired search target. base(Prefix) -> {Prefix, {}}. -%% Move the search target to the key next to the given Base. -move_up(#ctx{nextf = NextF}, Base) -> - NextF(Base). +base_init([W = <<"$", _/bytes>> | _]) -> + base([W]); +base_init(_) -> + base([]). -%% Add the given key to the accumulation. -add(#ctx{return_first = true}, _Acc, Key) -> - throw({return_first, Key}); -add(_C, Acc, Key) -> - match_add(Key, Acc). +%% Move the search target to the key next to the given Base. +move_up(NextF, Base) -> + NextF(Base). %% @doc Match given topic against the index and return the first match, or `false` if %% no match is found. -spec match(emqx_types:topic(), nextf()) -> false | key(_). match(Topic, NextF) -> try search(Topic, NextF, [return_first]) of - [] -> - false + _ -> false catch - throw:{return_first, Res} -> + throw:{first, Res} -> Res end. @@ -182,110 +172,159 @@ matches(Topic, NextF, Opts) -> %% @doc Entrypoint of the search for a given topic. search(Topic, NextF, Opts) -> Words = words(Topic), - Context = #ctx{ - nextf = NextF, - words0 = Words, - return_first = proplists:get_bool(return_first, Opts) - }, + Base = base_init(Words), + ORetFirst = proplists:get_bool(return_first, Opts), + OUnique = proplists:get_bool(unique, Opts), Acc0 = - case proplists:get_bool(unique, Opts) of + case ORetFirst of true -> + first; + false when OUnique -> #{}; false -> [] end, - Base = - case hd(Words) of - <<$$, _/binary>> -> - %% skip all filters starts with # or + - base([hd(Words)]); - _ -> - base([]) + Matches = + case search_new(Words, Base, NextF, Acc0) of + {Cursor, Acc} -> + match_topics(Topic, Cursor, NextF, Acc); + Acc -> + Acc end, - {MaybeEnd, Acc1} = search_new(Context, Base, Acc0), - Acc = match_topics(Context, Topic, MaybeEnd, Acc1), - case is_map(Acc) of + case is_map(Matches) of true -> - maps:values(Acc); + maps:values(Matches); false -> - Acc + Matches end. %% The recursive entrypoint of the trie-search algorithm. %% Always start from the initial prefix and words. -search_new(#ctx{words0 = Words} = C, NewBase, Acc) -> - case move_up(C, NewBase) of +search_new(Words0, NewBase, NextF, Acc) -> + case move_up(NextF, NewBase) of ?END -> - {?END, Acc}; - {Filter, _} = T -> - search_plus(C, Words, Filter, [], T, Acc) + Acc; + Cursor -> + search_up(Words0, Cursor, NextF, Acc) end. -%% Try to use '+' as the next word in the prefix. -search_plus(C, [W, X | Words], [W, X | Filter], RPrefix, T, Acc) -> - %% Directly append the current word to the matching prefix (RPrefix). - %% Micro optimization: try not to call the next clause because - %% it is not a continuation. - search_plus(C, [X | Words], [X | Filter], [W | RPrefix], T, Acc); -search_plus(C, [W | Words], ['+' | _] = Filter, RPrefix, T, Acc) -> - case search_up(C, '+', Words, Filter, RPrefix, T, Acc) of - {T, Acc1} -> - search_up(C, W, Words, Filter, RPrefix, T, Acc1); - TargetMoved -> - TargetMoved - end; -search_plus(C, [W | Words], Filter, RPrefix, T, Acc) -> - %% not a plus - search_up(C, W, Words, Filter, RPrefix, T, Acc). - %% Search to the bigger end of ordered collection of topics and topic-filters. -search_up(C, Word, Words, Filter, RPrefix, T, Acc) -> - case compare(Word, Filter, Words) of - {match, full} -> - search_new(C, T, add(C, Acc, T)); - {match, prefix} -> - search_new(C, T, Acc); +search_up(Words, {Filter, _} = Cursor, NextF, Acc) -> + case compare(Filter, Words, false) of + match_full -> + search_new(Words, Cursor, NextF, match_add(Cursor, Acc)); + match_prefix -> + search_new(Words, Cursor, NextF, Acc); lower -> - {T, Acc}; - higher -> - NewBase = base(lists:reverse([Word | RPrefix])), - search_new(C, NewBase, Acc); - shorter -> - search_plus(C, Words, tl(Filter), [Word | RPrefix], T, Acc) + {Cursor, Acc}; + [SeekWord | FilterTail] -> + % NOTE + % This is a seek instruction. + % If we visualize the `Filter` as `FilterHead ++ [_] ++ FilterTail`, we need to + % seek to `FilterHead ++ [SeekWord]`. It carries the `FilterTail` because it's + % much cheaper to return it from `compare/3` than anything more usable. + NewBase = base(seek(SeekWord, Filter, FilterTail)), + search_new(Words, NewBase, NextF, Acc) end. -%% Compare prefix word then the next words in suffix against the search-target -%% topic or topic-filter. -compare(_, NotFilter, _) when is_binary(NotFilter) -> - lower; -compare(H, [H | Filter], Words) -> - compare(Filter, Words); -compare(_, ['#'], _Words) -> - {match, full}; -compare(H1, [H2 | _T2], _Words) when H1 < H2 -> - lower; -compare(_H, [_ | _], _Words) -> - higher. +seek(SeekWord, [_ | FilterTail], FilterTail) -> + [SeekWord]; +seek(SeekWord, [FilterWord | Rest], FilterTail) -> + [FilterWord | seek(SeekWord, Rest, FilterTail)]. -%% Now compare the filter suffix and the topic suffix. -compare([], []) -> - {match, full}; -compare([], _Words) -> - {match, prefix}; -compare(['#'], _Words) -> - {match, full}; -compare([_ | _], []) -> +compare(NotFilter, _, _) when is_binary(NotFilter) -> lower; -compare([_ | _], _Words) -> - %% cannot know if it's a match, lower, or higher, - %% must search with a longer prefix. - shorter. +compare([], [], _) -> + % NOTE + % Topic: a/b/c/d + % Filter: a/+/+/d + % We matched the topic to a topic filter exactly (possibly with pluses). + % We include it in the result set, and now need to try next entry in the table. + % Closest possible next entries that we must not miss: + % * a/+/+/d (same topic but a different ID) + % * a/+/+/d/# (also a match) + match_full; +compare([], _Words, _) -> + % NOTE + % Topic: a/b/c/d + % Filter: a/+/c + % We found out that a topic filter is a prefix of the topic (possibly with pluses). + % We discard it, and now need to try next entry in the table. + % Closest possible next entries that we must not miss: + % * a/+/c/# (which is a match) + % * a/+/c/+ (also a match) + % + % TODO + % We might probably instead seek to a/+/c/# right away. + match_prefix; +compare(['#'], _Words, _) -> + % NOTE + % Topic: a/b/c/d + % Filter: a/+/+/d/# + % We matched the topic to a topic filter with wildcard (possibly with pluses). + % We include it in the result set, and now need to try next entry in the table. + % Closest possible next entries that we must not miss: + % * a/+/+/d/# (same topic but a different ID) + match_full; +compare(['+' | TF], [HW | TW], _PrevBacktrack) -> + % NOTE + % We need to keep backtrack point each time we encounter a plus. To safely skip over + % parts of the search space, we may need last backtrack point when recursion terminates. + % See next clauses for examples. + compare(TF, TW, [HW | TF]); +compare([HW | TF], [HW | TW], Backtrack) -> + % NOTE + % Skip over the same word in both topic and filter, keeping the last backtrack point. + compare(TF, TW, Backtrack); +compare([HF | _], [HW | _], false) when HF > HW -> + % NOTE + % Topic: a/b/c/d + % Filter: a/b/c/e/1 + % The topic is lower than a topic filter. There's nowhere to backtrackto, we're out of + % the search space. We should stop the search. + lower; +compare([HF | _], [HW | _], Backtrack) when HF > HW -> + % NOTE + % Topic: a/b/c/d + % Filter: a/+/+/e/1 + % The topic is lower than a topic filter. There was a plus, last time at the 3rd level, + % we have a backtrack point to seek to: + % Seek: [c | e/1] + % We need to skip over part of search space, and seek to the next possible match: + % Next: a/+/c + Backtrack; +compare([_ | _], [], false) -> + % NOTE + % Topic: a/b/c/d + % Filter: a/b/c/d/1 + % The topic is lower than a topic filter. (since it's shorter). There's nowhere to + % backtrack to, we're out of the search space. We should stop the search. + lower; +compare([_ | _], [], Backtrack) -> + % NOTE + % Topic: a/b/c/d + % Filter: a/+/c/d/1 + % The topic is lower than a topic filter. There was a plus, last and only time at the + % 3rd level, we have a backtrack point: + % Seek: [b | c/d/1] + % Next: a/b + Backtrack; +compare([_HF | TF], [HW | _], _) -> + % NOTE + % Topic: a/b/c/d + % Filter: a/+/+/0/1/2 + % Topic is higher than the filter, we need to skip over to the next possible filter. + % Seek: [d | 0/1/2] + % Next: a/+/+/d + [HW | TF]. match_add(K = {_Filter, ID}, Acc = #{}) -> % NOTE: ensuring uniqueness by record ID Acc#{ID => K}; -match_add(K, Acc) -> - [K | Acc]. +match_add(K, Acc) when is_list(Acc) -> + [K | Acc]; +match_add(K, first) -> + throw({first, K}). -spec words(emqx_types:topic()) -> [word()]. words(Topic) when is_binary(Topic) -> @@ -301,12 +340,12 @@ word(<<"#">>) -> '#'; word(Bin) -> Bin. %% match non-wildcard topics -match_topics(#ctx{nextf = NextF} = C, Topic, {Topic, _} = Key, Acc) -> +match_topics(Topic, {Topic, _} = Key, NextF, Acc) -> %% found a topic match - match_topics(C, Topic, NextF(Key), add(C, Acc, Key)); -match_topics(#ctx{nextf = NextF} = C, Topic, {F, _}, Acc) when F < Topic -> + match_topics(Topic, NextF(Key), NextF, match_add(Key, Acc)); +match_topics(Topic, {F, _}, NextF, Acc) when F < Topic -> %% the last key is a filter, try jump to the topic - match_topics(C, Topic, NextF(base(Topic)), Acc); -match_topics(_C, _Topic, _Key, Acc) -> + match_topics(Topic, NextF(base(Topic)), NextF, Acc); +match_topics(_Topic, _Key, _NextF, Acc) -> %% gone pass the topic Acc. diff --git a/apps/emqx/test/emqx_topic_index_SUITE.erl b/apps/emqx/test/emqx_topic_index_SUITE.erl index 551cc2438..08056a16f 100644 --- a/apps/emqx/test/emqx_topic_index_SUITE.erl +++ b/apps/emqx/test/emqx_topic_index_SUITE.erl @@ -256,6 +256,19 @@ t_match_wildcard_edge_cases(Config) -> end, lists:foreach(F, Datasets). +t_prop_edgecase(Config) -> + M = get_module(Config), + Tab = M:new(), + Topic = <<"01/01">>, + Filters = [ + {1, <<>>}, + {2, <<"+/01">>}, + {3, <<>>}, + {4, <<"+/+/01">>} + ], + _ = [M:insert(F, N, <<>>, Tab) || {N, F} <- Filters], + ?assertMatch([2], [id(X) || X <- matches(M, Topic, Tab, [unique])]). + t_prop_matches(Config) -> M = get_module(Config), ?assert( diff --git a/rebar.config.erl b/rebar.config.erl index 02e946e15..8f26d11d8 100644 --- a/rebar.config.erl +++ b/rebar.config.erl @@ -190,7 +190,8 @@ test_deps() -> {meck, "0.9.2"}, {proper, "1.4.0"}, {er_coap_client, {git, "https://github.com/emqx/er_coap_client", {tag, "v1.0.5"}}}, - {erl_csv, "0.2.0"} + {erl_csv, "0.2.0"}, + {eministat, "0.10.1"} ]. common_compile_opts() -> From fc37d235c7cfbfbfbfc45957f98a4bd227475213 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 25 Aug 2023 15:24:06 +0400 Subject: [PATCH 08/12] refactor(topicidx): simplify seek instructions Which also avoids comparing filter tail repeatedly when evaluating it. --- apps/emqx/src/emqx_trie_search.erl | 56 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 95174f292..445425cb8 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -137,7 +137,7 @@ get_topic({Filter, _ID}) when is_list(Filter) -> get_topic({Topic, _ID}) -> Topic. --compile({inline, [base/1, move_up/2, match_add/2, compare/3]}). +-compile({inline, [base/1, move_up/2, match_add/2, compare/4]}). %% Make the base-key which can be used to locate the desired search target. base(Prefix) -> @@ -210,31 +210,29 @@ search_new(Words0, NewBase, NextF, Acc) -> %% Search to the bigger end of ordered collection of topics and topic-filters. search_up(Words, {Filter, _} = Cursor, NextF, Acc) -> - case compare(Filter, Words, false) of + case compare(Filter, Words, 0, false) of match_full -> search_new(Words, Cursor, NextF, match_add(Cursor, Acc)); match_prefix -> search_new(Words, Cursor, NextF, Acc); lower -> {Cursor, Acc}; - [SeekWord | FilterTail] -> + {Pos, SeekWord} -> % NOTE - % This is a seek instruction. - % If we visualize the `Filter` as `FilterHead ++ [_] ++ FilterTail`, we need to - % seek to `FilterHead ++ [SeekWord]`. It carries the `FilterTail` because it's - % much cheaper to return it from `compare/3` than anything more usable. - NewBase = base(seek(SeekWord, Filter, FilterTail)), + % This is a seek instruction. It means we need to take `Pos` words + % from the current topic filter and attach `SeekWord` to the end of it. + NewBase = base(seek(Pos, SeekWord, Filter)), search_new(Words, NewBase, NextF, Acc) end. -seek(SeekWord, [_ | FilterTail], FilterTail) -> +seek(_Pos = 0, SeekWord, _FilterTail) -> [SeekWord]; -seek(SeekWord, [FilterWord | Rest], FilterTail) -> - [FilterWord | seek(SeekWord, Rest, FilterTail)]. +seek(Pos, SeekWord, [FilterWord | Rest]) -> + [FilterWord | seek(Pos - 1, SeekWord, Rest)]. -compare(NotFilter, _, _) when is_binary(NotFilter) -> +compare(NotFilter, _, _, _) when is_binary(NotFilter) -> lower; -compare([], [], _) -> +compare([], [], _, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/d @@ -244,7 +242,7 @@ compare([], [], _) -> % * a/+/+/d (same topic but a different ID) % * a/+/+/d/# (also a match) match_full; -compare([], _Words, _) -> +compare([], _Words, _, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/c @@ -257,7 +255,7 @@ compare([], _Words, _) -> % TODO % We might probably instead seek to a/+/c/# right away. match_prefix; -compare(['#'], _Words, _) -> +compare(['#'], _Words, _, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/d/# @@ -266,57 +264,57 @@ compare(['#'], _Words, _) -> % Closest possible next entries that we must not miss: % * a/+/+/d/# (same topic but a different ID) match_full; -compare(['+' | TF], [HW | TW], _PrevBacktrack) -> +compare(['+' | TF], [HW | TW], Pos, _PrevBacktrack) -> % NOTE % We need to keep backtrack point each time we encounter a plus. To safely skip over % parts of the search space, we may need last backtrack point when recursion terminates. % See next clauses for examples. - compare(TF, TW, [HW | TF]); -compare([HW | TF], [HW | TW], Backtrack) -> + compare(TF, TW, Pos + 1, {Pos, HW}); +compare([HW | TF], [HW | TW], Pos, Backtrack) -> % NOTE % Skip over the same word in both topic and filter, keeping the last backtrack point. - compare(TF, TW, Backtrack); -compare([HF | _], [HW | _], false) when HF > HW -> + compare(TF, TW, Pos + 1, Backtrack); +compare([HF | _], [HW | _], _, false) when HF > HW -> % NOTE % Topic: a/b/c/d % Filter: a/b/c/e/1 % The topic is lower than a topic filter. There's nowhere to backtrackto, we're out of % the search space. We should stop the search. lower; -compare([HF | _], [HW | _], Backtrack) when HF > HW -> +compare([HF | _], [HW | _], _, Backtrack) when HF > HW -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/e/1 % The topic is lower than a topic filter. There was a plus, last time at the 3rd level, % we have a backtrack point to seek to: - % Seek: [c | e/1] + % Seek: {2, c} % We need to skip over part of search space, and seek to the next possible match: % Next: a/+/c Backtrack; -compare([_ | _], [], false) -> +compare([_ | _], [], _, false) -> % NOTE % Topic: a/b/c/d % Filter: a/b/c/d/1 - % The topic is lower than a topic filter. (since it's shorter). There's nowhere to + % The topic is lower than a topic filter (since it's shorter). There's nowhere to % backtrack to, we're out of the search space. We should stop the search. lower; -compare([_ | _], [], Backtrack) -> +compare([_ | _], [], _, Backtrack) -> % NOTE % Topic: a/b/c/d % Filter: a/+/c/d/1 % The topic is lower than a topic filter. There was a plus, last and only time at the % 3rd level, we have a backtrack point: - % Seek: [b | c/d/1] + % Seek: {1, b} % Next: a/b Backtrack; -compare([_HF | TF], [HW | _], _) -> +compare([_ | _], [HW | _], Pos, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/0/1/2 % Topic is higher than the filter, we need to skip over to the next possible filter. - % Seek: [d | 0/1/2] + % Seek: {3, d} % Next: a/+/+/d - [HW | TF]. + {Pos, HW}. match_add(K = {_Filter, ID}, Acc = #{}) -> % NOTE: ensuring uniqueness by record ID From 33ed53bb6aeab659c3d075838d24b7a04fcdfee7 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 25 Aug 2023 16:26:39 +0400 Subject: [PATCH 09/12] refactor(topicidx): simplify `compare/3` further It's possible to emit seek instruction just once, on the way back out of the `compare/3` stack. --- apps/emqx/src/emqx_trie_search.erl | 80 ++++++++++++++---------------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 445425cb8..4acee979f 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -137,7 +137,7 @@ get_topic({Filter, _ID}) when is_list(Filter) -> get_topic({Topic, _ID}) -> Topic. --compile({inline, [base/1, move_up/2, match_add/2, compare/4]}). +-compile({inline, [base/1, move_up/2, match_add/2, compare/3]}). %% Make the base-key which can be used to locate the desired search target. base(Prefix) -> @@ -210,7 +210,7 @@ search_new(Words0, NewBase, NextF, Acc) -> %% Search to the bigger end of ordered collection of topics and topic-filters. search_up(Words, {Filter, _} = Cursor, NextF, Acc) -> - case compare(Filter, Words, 0, false) of + case compare(Filter, Words, 0) of match_full -> search_new(Words, Cursor, NextF, match_add(Cursor, Acc)); match_prefix -> @@ -230,9 +230,9 @@ seek(_Pos = 0, SeekWord, _FilterTail) -> seek(Pos, SeekWord, [FilterWord | Rest]) -> [FilterWord | seek(Pos - 1, SeekWord, Rest)]. -compare(NotFilter, _, _, _) when is_binary(NotFilter) -> +compare(NotFilter, _, _) when is_binary(NotFilter) -> lower; -compare([], [], _, _) -> +compare([], [], _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/d @@ -242,7 +242,7 @@ compare([], [], _, _) -> % * a/+/+/d (same topic but a different ID) % * a/+/+/d/# (also a match) match_full; -compare([], _Words, _, _) -> +compare([], _Words, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/c @@ -255,7 +255,7 @@ compare([], _Words, _, _) -> % TODO % We might probably instead seek to a/+/c/# right away. match_prefix; -compare(['#'], _Words, _, _) -> +compare(['#'], _Words, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/d/# @@ -264,50 +264,46 @@ compare(['#'], _Words, _, _) -> % Closest possible next entries that we must not miss: % * a/+/+/d/# (same topic but a different ID) match_full; -compare(['+' | TF], [HW | TW], Pos, _PrevBacktrack) -> - % NOTE - % We need to keep backtrack point each time we encounter a plus. To safely skip over - % parts of the search space, we may need last backtrack point when recursion terminates. - % See next clauses for examples. - compare(TF, TW, Pos + 1, {Pos, HW}); -compare([HW | TF], [HW | TW], Pos, Backtrack) -> +compare(['+' | TF], [HW | TW], Pos) -> + case compare(TF, TW, Pos + 1) of + lower -> + % NOTE + % Topic: a/b/c/d + % Filter: a/+/+/e/1 or a/b/+/d/1 + % The topic is lower than a topic filter. But we're at the `+` position, + % so we emit a backtrack point to seek to: + % Seek: {2, c} + % We skip over part of search space, and seek to the next possible match: + % Next: a/+/c + {Pos, HW}; + Other -> + % NOTE + % It's either already a backtrack point, emitted from the last `+` + % position or just a seek / match. In both cases we just pass it + % through. + Other + end; +compare([HW | TF], [HW | TW], Pos) -> % NOTE % Skip over the same word in both topic and filter, keeping the last backtrack point. - compare(TF, TW, Pos + 1, Backtrack); -compare([HF | _], [HW | _], _, false) when HF > HW -> + compare(TF, TW, Pos + 1); +compare([HF | _], [HW | _], _) when HF > HW -> % NOTE % Topic: a/b/c/d - % Filter: a/b/c/e/1 - % The topic is lower than a topic filter. There's nowhere to backtrackto, we're out of - % the search space. We should stop the search. + % Filter: a/b/c/e/1 or a/b/+/e + % The topic is lower than a topic filter. In the first case there's nowhere to + % backtrack to, we're out of the search space. In the second case there's a `+` + % on 3rd level, we'll seek up from there. lower; -compare([HF | _], [HW | _], _, Backtrack) when HF > HW -> +compare([_ | _], [], _) -> % NOTE % Topic: a/b/c/d - % Filter: a/+/+/e/1 - % The topic is lower than a topic filter. There was a plus, last time at the 3rd level, - % we have a backtrack point to seek to: - % Seek: {2, c} - % We need to skip over part of search space, and seek to the next possible match: - % Next: a/+/c - Backtrack; -compare([_ | _], [], _, false) -> - % NOTE - % Topic: a/b/c/d - % Filter: a/b/c/d/1 - % The topic is lower than a topic filter (since it's shorter). There's nowhere to - % backtrack to, we're out of the search space. We should stop the search. + % Filter: a/b/c/d/1 or a/+/c/d/1 + % The topic is lower than a topic filter (since it's shorter). In the first case + % there's nowhere to backtrack to, we're out of the search space. In the second case + % there's a `+` on 2nd level, we'll seek up from there. lower; -compare([_ | _], [], _, Backtrack) -> - % NOTE - % Topic: a/b/c/d - % Filter: a/+/c/d/1 - % The topic is lower than a topic filter. There was a plus, last and only time at the - % 3rd level, we have a backtrack point: - % Seek: {1, b} - % Next: a/b - Backtrack; -compare([_ | _], [HW | _], Pos, _) -> +compare([_ | _], [HW | _], Pos) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/0/1/2 From 558402a68e6cf244c7217578796daddd1f49639f Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Fri, 25 Aug 2023 10:49:23 +0200 Subject: [PATCH 10/12] chore(topic_index): add topic validation --- apps/emqx/src/emqx_trie_search.erl | 22 +++++++++------- apps/emqx/test/emqx_trie_search_tests.erl | 32 +++++++++++++++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 apps/emqx/test/emqx_trie_search_tests.erl diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 4acee979f..ddeef966b 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -113,7 +113,7 @@ %% @doc Make a search-key for the given topic. -spec make_key(emqx_types:topic(), ID) -> key(ID). make_key(Topic, ID) when is_binary(Topic) -> - Words = words(Topic), + Words = filter_words(Topic), case emqx_topic:wildcard(Words) of true -> %% it's a wildcard @@ -171,7 +171,7 @@ matches(Topic, NextF, Opts) -> %% @doc Entrypoint of the search for a given topic. search(Topic, NextF, Opts) -> - Words = words(Topic), + Words = topic_words(Topic), Base = base_init(Words), ORetFirst = proplists:get_bool(return_first, Opts), OUnique = proplists:get_bool(unique, Opts), @@ -320,18 +320,22 @@ match_add(K, Acc) when is_list(Acc) -> match_add(K, first) -> throw({first, K}). --spec words(emqx_types:topic()) -> [word()]. -words(Topic) when is_binary(Topic) -> +-spec filter_words(emqx_types:topic()) -> [word()]. +filter_words(Topic) when is_binary(Topic) -> % NOTE % This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty % tokens to ''. This is needed to keep ordering of words consistent with what % `match_filter/3` expects. - [word(W) || W <- emqx_topic:tokens(Topic)]. + [word(W, filter) || W <- emqx_topic:tokens(Topic)]. --spec word(binary()) -> word(). -word(<<"+">>) -> '+'; -word(<<"#">>) -> '#'; -word(Bin) -> Bin. +topic_words(Topic) when is_binary(Topic) -> + [word(W, topic) || W <- emqx_topic:tokens(Topic)]. + +word(<<"+">>, topic) -> error(badarg); +word(<<"#">>, topic) -> error(badarg); +word(<<"+">>, filter) -> '+'; +word(<<"#">>, filter) -> '#'; +word(Bin, _) -> Bin. %% match non-wildcard topics match_topics(Topic, {Topic, _} = Key, NextF, Acc) -> diff --git a/apps/emqx/test/emqx_trie_search_tests.erl b/apps/emqx/test/emqx_trie_search_tests.erl new file mode 100644 index 000000000..75994131d --- /dev/null +++ b/apps/emqx/test/emqx_trie_search_tests.erl @@ -0,0 +1,32 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_trie_search_tests). + +-include_lib("eunit/include/eunit.hrl"). + +topic_validation_test() -> + NextF = fun(_) -> '$end_of_table' end, + Call = fun(Topic) -> + emqx_trie_search:match(Topic, NextF) + end, + ?assertError(badarg, Call(<<"+">>)), + ?assertError(badarg, Call(<<"#">>)), + ?assertError(badarg, Call(<<"a/+/b">>)), + ?assertError(badarg, Call(<<"a/b/#">>)), + ?assertEqual(false, Call(<<"a/b/b+">>)), + ?assertEqual(false, Call(<<"a/b/c#">>)), + ok. From ca59a87d47afacfba013e9e5c3b10e902422be78 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 25 Aug 2023 16:40:23 +0400 Subject: [PATCH 11/12] chore(topicidx): refine example of wildcard compare --- apps/emqx/src/emqx_trie_search.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index ddeef966b..6285eb4eb 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -258,7 +258,7 @@ compare([], _Words, _) -> compare(['#'], _Words, _) -> % NOTE % Topic: a/b/c/d - % Filter: a/+/+/d/# + % Filter: a/+/+/d/# or just a/# % We matched the topic to a topic filter with wildcard (possibly with pluses). % We include it in the result set, and now need to try next entry in the table. % Closest possible next entries that we must not miss: From d5cff533e3e3128bc36def697a217a0cc16f4a04 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 25 Aug 2023 16:41:21 +0400 Subject: [PATCH 12/12] chore(topicidx): drop TODO comment --- apps/emqx/src/emqx_trie_search.erl | 3 --- 1 file changed, 3 deletions(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 6285eb4eb..b774e1459 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -251,9 +251,6 @@ compare([], _Words, _) -> % Closest possible next entries that we must not miss: % * a/+/c/# (which is a match) % * a/+/c/+ (also a match) - % - % TODO - % We might probably instead seek to a/+/c/# right away. match_prefix; compare(['#'], _Words, _) -> % NOTE