From fc37d235c7cfbfbfbfc45957f98a4bd227475213 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 25 Aug 2023 15:24:06 +0400 Subject: [PATCH] refactor(topicidx): simplify seek instructions Which also avoids comparing filter tail repeatedly when evaluating it. --- apps/emqx/src/emqx_trie_search.erl | 56 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/apps/emqx/src/emqx_trie_search.erl b/apps/emqx/src/emqx_trie_search.erl index 95174f292..445425cb8 100644 --- a/apps/emqx/src/emqx_trie_search.erl +++ b/apps/emqx/src/emqx_trie_search.erl @@ -137,7 +137,7 @@ get_topic({Filter, _ID}) when is_list(Filter) -> get_topic({Topic, _ID}) -> Topic. --compile({inline, [base/1, move_up/2, match_add/2, compare/3]}). +-compile({inline, [base/1, move_up/2, match_add/2, compare/4]}). %% Make the base-key which can be used to locate the desired search target. base(Prefix) -> @@ -210,31 +210,29 @@ search_new(Words0, NewBase, NextF, Acc) -> %% Search to the bigger end of ordered collection of topics and topic-filters. search_up(Words, {Filter, _} = Cursor, NextF, Acc) -> - case compare(Filter, Words, false) of + case compare(Filter, Words, 0, false) of match_full -> search_new(Words, Cursor, NextF, match_add(Cursor, Acc)); match_prefix -> search_new(Words, Cursor, NextF, Acc); lower -> {Cursor, Acc}; - [SeekWord | FilterTail] -> + {Pos, SeekWord} -> % NOTE - % This is a seek instruction. - % If we visualize the `Filter` as `FilterHead ++ [_] ++ FilterTail`, we need to - % seek to `FilterHead ++ [SeekWord]`. It carries the `FilterTail` because it's - % much cheaper to return it from `compare/3` than anything more usable. - NewBase = base(seek(SeekWord, Filter, FilterTail)), + % This is a seek instruction. It means we need to take `Pos` words + % from the current topic filter and attach `SeekWord` to the end of it. + NewBase = base(seek(Pos, SeekWord, Filter)), search_new(Words, NewBase, NextF, Acc) end. -seek(SeekWord, [_ | FilterTail], FilterTail) -> +seek(_Pos = 0, SeekWord, _FilterTail) -> [SeekWord]; -seek(SeekWord, [FilterWord | Rest], FilterTail) -> - [FilterWord | seek(SeekWord, Rest, FilterTail)]. +seek(Pos, SeekWord, [FilterWord | Rest]) -> + [FilterWord | seek(Pos - 1, SeekWord, Rest)]. -compare(NotFilter, _, _) when is_binary(NotFilter) -> +compare(NotFilter, _, _, _) when is_binary(NotFilter) -> lower; -compare([], [], _) -> +compare([], [], _, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/d @@ -244,7 +242,7 @@ compare([], [], _) -> % * a/+/+/d (same topic but a different ID) % * a/+/+/d/# (also a match) match_full; -compare([], _Words, _) -> +compare([], _Words, _, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/c @@ -257,7 +255,7 @@ compare([], _Words, _) -> % TODO % We might probably instead seek to a/+/c/# right away. match_prefix; -compare(['#'], _Words, _) -> +compare(['#'], _Words, _, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/d/# @@ -266,57 +264,57 @@ compare(['#'], _Words, _) -> % Closest possible next entries that we must not miss: % * a/+/+/d/# (same topic but a different ID) match_full; -compare(['+' | TF], [HW | TW], _PrevBacktrack) -> +compare(['+' | TF], [HW | TW], Pos, _PrevBacktrack) -> % NOTE % We need to keep backtrack point each time we encounter a plus. To safely skip over % parts of the search space, we may need last backtrack point when recursion terminates. % See next clauses for examples. - compare(TF, TW, [HW | TF]); -compare([HW | TF], [HW | TW], Backtrack) -> + compare(TF, TW, Pos + 1, {Pos, HW}); +compare([HW | TF], [HW | TW], Pos, Backtrack) -> % NOTE % Skip over the same word in both topic and filter, keeping the last backtrack point. - compare(TF, TW, Backtrack); -compare([HF | _], [HW | _], false) when HF > HW -> + compare(TF, TW, Pos + 1, Backtrack); +compare([HF | _], [HW | _], _, false) when HF > HW -> % NOTE % Topic: a/b/c/d % Filter: a/b/c/e/1 % The topic is lower than a topic filter. There's nowhere to backtrackto, we're out of % the search space. We should stop the search. lower; -compare([HF | _], [HW | _], Backtrack) when HF > HW -> +compare([HF | _], [HW | _], _, Backtrack) when HF > HW -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/e/1 % The topic is lower than a topic filter. There was a plus, last time at the 3rd level, % we have a backtrack point to seek to: - % Seek: [c | e/1] + % Seek: {2, c} % We need to skip over part of search space, and seek to the next possible match: % Next: a/+/c Backtrack; -compare([_ | _], [], false) -> +compare([_ | _], [], _, false) -> % NOTE % Topic: a/b/c/d % Filter: a/b/c/d/1 - % The topic is lower than a topic filter. (since it's shorter). There's nowhere to + % The topic is lower than a topic filter (since it's shorter). There's nowhere to % backtrack to, we're out of the search space. We should stop the search. lower; -compare([_ | _], [], Backtrack) -> +compare([_ | _], [], _, Backtrack) -> % NOTE % Topic: a/b/c/d % Filter: a/+/c/d/1 % The topic is lower than a topic filter. There was a plus, last and only time at the % 3rd level, we have a backtrack point: - % Seek: [b | c/d/1] + % Seek: {1, b} % Next: a/b Backtrack; -compare([_HF | TF], [HW | _], _) -> +compare([_ | _], [HW | _], Pos, _) -> % NOTE % Topic: a/b/c/d % Filter: a/+/+/0/1/2 % Topic is higher than the filter, we need to skip over to the next possible filter. - % Seek: [d | 0/1/2] + % Seek: {3, d} % Next: a/+/+/d - [HW | TF]. + {Pos, HW}. match_add(K = {_Filter, ID}, Acc = #{}) -> % NOTE: ensuring uniqueness by record ID